Merge tag 'bootconfig-fixes-v6.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Apr 2024 16:52:09 +0000 (09:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 19 Apr 2024 16:52:09 +0000 (09:52 -0700)
Pull bootconfig fixes from Masami Hiramatsu:

 - Fix potential static_command_line buffer overrun.

   Currently we allocate the memory for static_command_line based on
   "boot_command_line", but it will copy "command_line" into it. So we
   use the length of "command_line" instead of "boot_command_line" (as
   we previously did)

 - Use memblock_free_late() in xbc_exit() instead of memblock_free()
   after the buddy system is initialized

 - Fix a kerneldoc warning

* tag 'bootconfig-fixes-v6.9-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace:
  bootconfig: Fix the kerneldoc of _xbc_exit()
  bootconfig: use memblock_free_late to free xbc memory to buddy
  init/main.c: Fix potential static_command_line memory overflow

497 files changed:
.mailmap
CREDITS
Documentation/admin-guide/hw-vuln/spectre.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/display/msm/qcom,sm8150-mdss.yaml
Documentation/devicetree/bindings/pwm/mediatek,pwm-disp.yaml
Documentation/driver-api/virtio/writing_virtio_drivers.rst
Documentation/filesystems/bcachefs/index.rst [new file with mode: 0644]
Documentation/filesystems/index.rst
Documentation/mm/page_owner.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi
arch/arm/boot/dts/nxp/imx/imx7s-warp.dts
arch/arm/mach-omap2/board-n8x0.c
arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi
arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi
arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi
arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi
arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi
arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi
arch/arm64/include/asm/tlbflush.h
arch/loongarch/boot/dts/loongson-2k1000.dtsi
arch/loongarch/boot/dts/loongson-2k2000-ref.dts
arch/loongarch/boot/dts/loongson-2k2000.dtsi
arch/loongarch/include/asm/addrspace.h
arch/loongarch/include/asm/io.h
arch/loongarch/include/asm/kfence.h
arch/loongarch/include/asm/page.h
arch/loongarch/mm/mmap.c
arch/loongarch/mm/pgtable.c
arch/mips/include/asm/ptrace.h
arch/mips/kernel/asm-offsets.c
arch/mips/kernel/ptrace.c
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-n64.S
arch/mips/kernel/scall64-o32.S
arch/x86/Kconfig
arch/x86/entry/common.c
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/syscall_32.c
arch/x86/entry/syscall_64.c
arch/x86/entry/syscall_x32.c
arch/x86/events/core.c
arch/x86/hyperv/hv_apic.c
arch/x86/hyperv/hv_proc.c
arch/x86/include/asm/apic.h
arch/x86/include/asm/cpufeatures.h
arch/x86/include/asm/msr-index.h
arch/x86/include/asm/nospec-branch.h
arch/x86/include/asm/syscall.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/amd.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/cpu/scattered.c
arch/x86/kernel/cpu/topology.c
arch/x86/kernel/cpu/topology_amd.c
arch/x86/kvm/reverse_cpuid.h
arch/x86/kvm/vmx/vmenter.S
arch/x86/kvm/x86.c
block/blk-cgroup.c
block/blk-cgroup.h
block/blk-core.c
block/blk-iocost.c
block/blk-settings.c
drivers/accel/ivpu/ivpu_drv.c
drivers/accel/ivpu/ivpu_drv.h
drivers/accel/ivpu/ivpu_hw.h
drivers/accel/ivpu/ivpu_hw_37xx.c
drivers/accel/ivpu/ivpu_hw_40xx.c
drivers/accel/ivpu/ivpu_ipc.c
drivers/accel/ivpu/ivpu_mmu.c
drivers/accel/ivpu/ivpu_pm.c
drivers/acpi/scan.c
drivers/ata/ahci.c
drivers/ata/libata-core.c
drivers/ata/libata-scsi.c
drivers/cache/sifive_ccache.c
drivers/char/random.c
drivers/cxl/acpi.c
drivers/cxl/core/cdat.c
drivers/cxl/core/mbox.c
drivers/cxl/core/port.c
drivers/cxl/core/regs.c
drivers/cxl/cxl.h
drivers/cxl/cxlmem.h
drivers/firmware/arm_ffa/driver.c
drivers/firmware/arm_scmi/powercap.c
drivers/firmware/arm_scmi/raw_mode.c
drivers/gpio/gpio-crystalcove.c
drivers/gpio/gpio-lpc32xx.c
drivers/gpio/gpio-wcove.c
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c
drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
drivers/gpu/drm/amd/amdgpu/soc21.c
drivers/gpu/drm/amd/amdgpu/umsch_mm_v4_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_process.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_wb.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
drivers/gpu/drm/amd/display/dc/core/dc_state.c
drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dio_link_encoder.c
drivers/gpu/drm/amd/display/dc/dcn35/dcn35_dio_link_encoder.c
drivers/gpu/drm/amd/display/dc/optc/dcn32/dcn32_optc.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu14_driver_if_v14_0_0.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_pmfw.h
drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v14_0_0_ppsmc.h
drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0.h
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0.c
drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_0_ppt.c
drivers/gpu/drm/ast/ast_dp.c
drivers/gpu/drm/drm_client_modeset.c
drivers/gpu/drm/i915/display/intel_cdclk.c
drivers/gpu/drm/i915/display/intel_cdclk.h
drivers/gpu/drm/i915/display/intel_ddi.c
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp_hdcp.c
drivers/gpu/drm/i915/display/intel_psr.c
drivers/gpu/drm/i915/display/intel_vrr.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/gt/uc/intel_uc.c
drivers/gpu/drm/msm/adreno/a6xx_gpu.c
drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
drivers/gpu/drm/msm/disp/dpu1/catalog/dpu_9_2_x1e80100.h
drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_interrupts.c
drivers/gpu/drm/msm/dp/dp_display.c
drivers/gpu/drm/msm/msm_fb.c
drivers/gpu/drm/msm/msm_kms.c
drivers/gpu/drm/nouveau/nouveau_bios.c
drivers/gpu/drm/nouveau/nouveau_dp.c
drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c
drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c
drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
drivers/gpu/drm/panel/panel-novatek-nt36672e.c
drivers/gpu/drm/panel/panel-visionox-rm69299.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/gpu/drm/qxl/qxl_release.c
drivers/gpu/drm/radeon/pptable.h
drivers/gpu/drm/radeon/radeon_atombios.c
drivers/gpu/drm/ttm/ttm_pool.c
drivers/gpu/drm/v3d/v3d_irq.c
drivers/gpu/drm/vmwgfx/vmwgfx_blit.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
drivers/gpu/drm/vmwgfx/vmwgfx_gem.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
drivers/gpu/drm/xe/display/intel_fb_bo.c
drivers/gpu/drm/xe/display/xe_display.c
drivers/gpu/drm/xe/regs/xe_engine_regs.h
drivers/gpu/drm/xe/xe_hwmon.c
drivers/gpu/drm/xe/xe_lrc.c
drivers/gpu/drm/xe/xe_migrate.c
drivers/gpu/drm/xe/xe_vm.c
drivers/gpu/host1x/bus.c
drivers/hv/channel.c
drivers/hv/connection.c
drivers/hv/vmbus_drv.c
drivers/iommu/amd/init.c
drivers/iommu/amd/iommu.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/perfmon.c
drivers/iommu/intel/svm.c
drivers/iommu/mtk_iommu.c
drivers/iommu/mtk_iommu_v1.c
drivers/irqchip/irq-gic-v3-its.c
drivers/isdn/mISDN/socket.c
drivers/md/raid1.c
drivers/media/platform/mediatek/vcodec/common/mtk_vcodec_fw_vpu.c
drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.c
drivers/media/platform/mediatek/vcodec/decoder/mtk_vcodec_dec_drv.h
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_hevc_req_multi_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp8_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec/vdec_vp9_req_lat_if.c
drivers/media/platform/mediatek/vcodec/decoder/vdec_vpu_if.c
drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.c
drivers/media/platform/mediatek/vcodec/encoder/mtk_vcodec_enc_drv.h
drivers/media/platform/mediatek/vcodec/encoder/venc_vpu_if.c
drivers/mmc/host/omap.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/mt7530.h
drivers/net/ethernet/amazon/ena/ena_com.c
drivers/net/ethernet/amazon/ena/ena_netdev.c
drivers/net/ethernet/amazon/ena/ena_xdp.c
drivers/net/ethernet/amd/pds_core/core.c
drivers/net/ethernet/amd/pds_core/core.h
drivers/net/ethernet/amd/pds_core/dev.c
drivers/net/ethernet/amd/pds_core/main.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
drivers/net/ethernet/intel/ice/ice_tc_lib.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
drivers/net/ethernet/marvell/octeontx2/nic/qos.c
drivers/net/ethernet/mediatek/mtk_wed.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
drivers/net/ethernet/mellanox/mlx5/core/en/qos.c
drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.c
drivers/net/ethernet/mellanox/mlx5/core/en/rqt.h
drivers/net/ethernet/mellanox/mlx5/core/en/selq.c
drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c
drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c
drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c
drivers/net/ethernet/micrel/ks8851.h
drivers/net/ethernet/micrel/ks8851_common.c
drivers/net/ethernet/micrel/ks8851_par.c
drivers/net/ethernet/micrel/ks8851_spi.c
drivers/net/ethernet/microchip/sparx5/sparx5_port.c
drivers/net/ethernet/microchip/sparx5/sparx5_tc_flower.c
drivers/net/ethernet/realtek/r8169.h
drivers/net/ethernet/realtek/r8169_leds.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/stmicro/stmmac/common.h
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
drivers/net/ethernet/stmicro/stmmac/mmc.h
drivers/net/ethernet/stmicro/stmmac/mmc_core.c
drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/ti/am65-cpsw-nuss.c
drivers/net/geneve.c
drivers/net/hyperv/netvsc.c
drivers/net/tun.c
drivers/net/usb/ax88179_178a.c
drivers/net/usb/qmi_wwan.c
drivers/net/virtio_net.c
drivers/pci/quirks.c
drivers/platform/chrome/cros_ec_uart.c
drivers/platform/x86/acer-wmi.c
drivers/platform/x86/amd/pmc/pmc-quirks.c
drivers/platform/x86/amd/pmf/Makefile
drivers/platform/x86/amd/pmf/acpi.c
drivers/platform/x86/amd/pmf/core.c
drivers/platform/x86/amd/pmf/pmf-quirks.c [new file with mode: 0644]
drivers/platform/x86/amd/pmf/pmf.h
drivers/platform/x86/intel/hid.c
drivers/platform/x86/intel/speed_select_if/isst_if_common.c
drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c
drivers/platform/x86/intel/vbtn.c
drivers/platform/x86/lg-laptop.c
drivers/platform/x86/toshiba_acpi.c
drivers/pwm/pwm-dwc-core.c
drivers/pwm/pwm-dwc.c
drivers/pwm/pwm-dwc.h
drivers/s390/net/ism_drv.c
drivers/scsi/hisi_sas/hisi_sas_main.c
drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
drivers/scsi/qla2xxx/qla_edif.c
drivers/scsi/scsi_lib.c
drivers/scsi/sg.c
drivers/target/target_core_configfs.c
drivers/thermal/thermal_debugfs.c
drivers/ufs/host/ufs-qcom.c
drivers/uio/uio_hv_generic.c
drivers/vhost/vhost.c
drivers/virt/vmgenid.c
drivers/virtio/virtio.c
fs/bcachefs/acl.c
fs/bcachefs/backpointers.c
fs/bcachefs/backpointers.h
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bkey.h
fs/bcachefs/bkey_methods.c
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_journal_iter.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_locking.c
fs/bcachefs/btree_node_scan.c
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_write_buffer.c
fs/bcachefs/buckets.h
fs/bcachefs/chardev.c
fs/bcachefs/checksum.c
fs/bcachefs/checksum.h
fs/bcachefs/compress.h
fs/bcachefs/data_update.c
fs/bcachefs/debug.c
fs/bcachefs/ec.c
fs/bcachefs/ec.h
fs/bcachefs/extents.c
fs/bcachefs/eytzinger.c
fs/bcachefs/eytzinger.h
fs/bcachefs/fs-io-direct.c
fs/bcachefs/fs-io.c
fs/bcachefs/journal_io.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_types.h
fs/bcachefs/opts.c
fs/bcachefs/opts.h
fs/bcachefs/recovery.c
fs/bcachefs/recovery_passes.c
fs/bcachefs/sb-downgrade.c
fs/bcachefs/sb-errors_types.h
fs/bcachefs/sb-members.c
fs/bcachefs/sb-members.h
fs/bcachefs/snapshot.c
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/super_types.h
fs/bcachefs/sysfs.c
fs/bcachefs/tests.c
fs/bcachefs/util.h
fs/btrfs/delayed-inode.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/qgroup.c
fs/btrfs/root-tree.c
fs/btrfs/root-tree.h
fs/btrfs/transaction.c
fs/ceph/addr.c
fs/ceph/caps.c
fs/ceph/mds_client.c
fs/ceph/mds_client.h
fs/kernfs/file.c
fs/nfsd/nfs4xdr.c
fs/nilfs2/dir.c
fs/smb/client/cached_dir.c
fs/smb/client/cifsglob.h
fs/smb/client/connect.c
fs/smb/client/fs_context.c
fs/smb/client/fs_context.h
fs/smb/client/inode.c
fs/smb/client/misc.c
fs/smb/client/smb2ops.c
fs/smb/client/smb2pdu.c
fs/squashfs/inode.c
fs/tracefs/event_inode.c
fs/zonefs/super.c
include/acpi/acpi_bus.h
include/asm-generic/bug.h
include/asm-generic/hyperv-tlfs.h
include/asm-generic/mshyperv.h
include/linux/compiler.h
include/linux/dma-fence.h
include/linux/gfp_types.h
include/linux/gpio/property.h
include/linux/hyperv.h
include/linux/io_uring_types.h
include/linux/irqflags.h
include/linux/mm.h
include/linux/randomize_kstack.h
include/linux/rwbase_rt.h
include/linux/rwsem.h
include/linux/shmem_fs.h
include/linux/sockptr.h
include/linux/swapops.h
include/linux/u64_stats_sync.h
include/linux/udp.h
include/linux/virtio.h
include/net/addrconf.h
include/net/bluetooth/bluetooth.h
include/net/ip_tunnels.h
include/net/netfilter/nf_flow_table.h
include/net/netfilter/nf_tables.h
include/net/sch_generic.h
include/trace/events/rpcgss.h
include/uapi/linux/vhost.h
io_uring/io_uring.c
io_uring/net.c
kernel/cpu.c
kernel/dma/swiotlb.c
kernel/fork.c
kernel/kprobes.c
kernel/power/suspend.c
kernel/time/tick-common.c
kernel/time/tick-sched.c
kernel/trace/Kconfig
kernel/trace/ring_buffer.c
kernel/trace/trace_events.c
lib/checksum_kunit.c
lib/test_ubsan.c
mm/gup.c
mm/huge_memory.c
mm/hugetlb.c
mm/internal.h
mm/madvise.c
mm/memory-failure.c
mm/page_owner.c
mm/shmem.c
net/batman-adv/translation-table.c
net/bluetooth/hci_request.c
net/bluetooth/hci_sock.c
net/bluetooth/hci_sync.c
net/bluetooth/iso.c
net/bluetooth/l2cap_core.c
net/bluetooth/l2cap_sock.c
net/bluetooth/rfcomm/sock.c
net/bluetooth/sco.c
net/bridge/br_input.c
net/bridge/br_netfilter_hooks.c
net/bridge/br_private.h
net/bridge/netfilter/nf_conntrack_bridge.c
net/core/dev.c
net/ipv4/fib_frontend.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/route.c
net/ipv6/addrconf.c
net/ipv6/ip6_fib.c
net/ipv6/netfilter/ip6_tables.c
net/netfilter/nf_flow_table_inet.c
net/netfilter/nf_flow_table_ip.c
net/netfilter/nf_tables_api.c
net/netfilter/nft_lookup.c
net/netfilter/nft_set_bitmap.c
net/netfilter/nft_set_hash.c
net/netfilter/nft_set_pipapo.c
net/netfilter/nft_set_rbtree.c
net/nfc/llcp_sock.c
net/openvswitch/conntrack.c
net/sched/sch_generic.c
net/unix/af_unix.c
net/unix/garbage.c
net/xdp/xsk.c
scripts/gcc-plugins/stackleak_plugin.c
sound/core/seq/seq_ump_convert.c
sound/pci/hda/patch_realtek.c
sound/pci/hda/tas2781_hda_i2c.c
tools/hv/hv_kvp_daemon.c
tools/include/linux/kernel.h
tools/include/linux/mm.h
tools/include/linux/panic.h [new file with mode: 0644]
tools/power/x86/turbostat/turbostat.8
tools/power/x86/turbostat/turbostat.c
tools/testing/cxl/test/cxl.c
tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
tools/testing/selftests/kselftest.h
tools/testing/selftests/kselftest_harness.h
tools/testing/selftests/net/tcp_ao/lib/proc.c
tools/testing/selftests/net/tcp_ao/lib/setup.c
tools/testing/selftests/net/tcp_ao/rst.c
tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
tools/testing/selftests/net/udpgso.c
tools/testing/selftests/timers/posix_timers.c
tools/testing/selftests/timers/valid-adjtimex.c
tools/testing/selftests/turbostat/defcolumns.py [new file with mode: 0755]

index 8284692f9610715fa2bc04f8771cb45d1b5ebf88..625b496bf5f45100b8c563349eb077059cfa0138 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -446,7 +446,8 @@ Mythri P K <mythripk@ti.com>
 Nadav Amit <nadav.amit@gmail.com> <namit@vmware.com>
 Nadav Amit <nadav.amit@gmail.com> <namit@cs.technion.ac.il>
 Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
-Naoya Horiguchi <naoya.horiguchi@nec.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <n-horiguchi@ah.jp.nec.com>
+Naoya Horiguchi <nao.horiguchi@gmail.com> <naoya.horiguchi@nec.com>
 Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
 Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
 Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
diff --git a/CREDITS b/CREDITS
index c55c5a0ee4ff65e244eb3a9de9aeb35515bc2381..0107047f807bfc01a0c5e7ad380e15a5ddc95776 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -3146,6 +3146,10 @@ S: Triftstra=DFe 55
 S: 13353 Berlin
 S: Germany
 
+N: Gustavo Pimental
+E: gustavo.pimentel@synopsys.com
+D: PCI driver for Synopsys DesignWare
+
 N: Emanuel Pirker
 E: epirker@edu.uni-klu.ac.at
 D: AIC5800 IEEE 1394, RAW I/O on 1394
index cce768afec6bed11a961643dcdc2d1ae97848684..25a04cda4c2c054864fa1792d98d9f095ea56a17 100644 (file)
@@ -138,11 +138,10 @@ associated with the source address of the indirect branch. Specifically,
 the BHB might be shared across privilege levels even in the presence of
 Enhanced IBRS.
 
-Currently the only known real-world BHB attack vector is via
-unprivileged eBPF. Therefore, it's highly recommended to not enable
-unprivileged eBPF, especially when eIBRS is used (without retpolines).
-For a full mitigation against BHB attacks, it's recommended to use
-retpolines (or eIBRS combined with retpolines).
+Previously the only known real-world BHB attack vector was via unprivileged
+eBPF. Further research has found attacks that don't require unprivileged eBPF.
+For a full mitigation against BHB attacks it is recommended to set BHI_DIS_S or
+use the BHB clearing sequence.
 
 Attack scenarios
 ----------------
@@ -430,6 +429,23 @@ The possible values in this file are:
   'PBRSB-eIBRS: Not affected'  CPU is not affected by PBRSB
   ===========================  =======================================================
 
+  - Branch History Injection (BHI) protection status:
+
+.. list-table::
+
+ * - BHI: Not affected
+   - System is not affected
+ * - BHI: Retpoline
+   - System is protected by retpoline
+ * - BHI: BHI_DIS_S
+   - System is protected by BHI_DIS_S
+ * - BHI: SW loop, KVM SW loop
+   - System is protected by software clearing sequence
+ * - BHI: Vulnerable
+   - System is vulnerable to BHI
+ * - BHI: Vulnerable, KVM: SW loop
+   - System is vulnerable; KVM is protected by software clearing sequence
+
 Full mitigation might require a microcode update from the CPU
 vendor. When the necessary microcode is not available, the kernel will
 report vulnerability.
@@ -484,7 +500,11 @@ Spectre variant 2
 
    Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
    boot, by setting the IBRS bit, and they're automatically protected against
-   Spectre v2 variant attacks.
+   some Spectre v2 variant attacks. The BHB can still influence the choice of
+   indirect branch predictor entry, and although branch predictor entries are
+   isolated between modes when eIBRS is enabled, the BHB itself is not isolated
+   between modes. Systems which support BHI_DIS_S will set it to protect against
+   BHI attacks.
 
    On Intel's enhanced IBRS systems, this includes cross-thread branch target
    injections on SMT systems (STIBP). In other words, Intel eIBRS enables
@@ -638,6 +658,18 @@ kernel command line.
                spectre_v2=off. Spectre variant 1 mitigations
                cannot be disabled.
 
+       spectre_bhi=
+
+               [X86] Control mitigation of Branch History Injection
+               (BHI) vulnerability.  This setting affects the deployment
+               of the HW BHI control and the SW BHB clearing sequence.
+
+               on
+                       (default) Enable the HW or SW mitigation as
+                       needed.
+               off
+                       Disable the mitigation.
+
 For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
 
 Mitigation selection guide
index 623fce7d5fcd0c4392432908e21aaba5134e3aa0..902ecd92a29fbe83df18d32d1a8fe652c8277132 100644 (file)
                                               retbleed=off [X86]
                                               spec_rstack_overflow=off [X86]
                                               spec_store_bypass_disable=off [X86,PPC]
+                                              spectre_bhi=off [X86]
                                               spectre_v2_user=off [X86]
                                               srbds=off [X86,INTEL]
                                               ssbd=force-off [ARM64]
        sonypi.*=       [HW] Sony Programmable I/O Control Device driver
                        See Documentation/admin-guide/laptops/sonypi.rst
 
+       spectre_bhi=    [X86] Control mitigation of Branch History Injection
+                       (BHI) vulnerability.  This setting affects the
+                       deployment of the HW BHI control and the SW BHB
+                       clearing sequence.
+
+                       on   - (default) Enable the HW or SW mitigation
+                              as needed.
+                       off  - Disable the mitigation.
+
        spectre_v2=     [X86,EARLY] Control mitigation of Spectre variant 2
                        (indirect branch speculation) vulnerability.
                        The default operation protects the kernel from
index c0d6a4fdff97e37f31ecc763347497aea9450780..e6dc5494baee29a7171c11ac074159e6a08f8627 100644 (file)
@@ -53,6 +53,15 @@ patternProperties:
       compatible:
         const: qcom,sm8150-dpu
 
+  "^displayport-controller@[0-9a-f]+$":
+    type: object
+    additionalProperties: true
+
+    properties:
+      compatible:
+        contains:
+          const: qcom,sm8150-dp
+
   "^dsi@[0-9a-f]+$":
     type: object
     additionalProperties: true
index afcdeed4e88af625ea4f0f371cc11ffdbe824859..bc813fe74faba5ae50bc81ecb2f75f9e1d8803c9 100644 (file)
@@ -52,6 +52,9 @@ properties:
       - const: main
       - const: mm
 
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
index e14c58796d250116107041b1be3e40aafa564656..e5de6f5d061a7c2162bc6fac628e542389602be3 100644 (file)
@@ -97,7 +97,6 @@ like this::
 
        static struct virtio_driver virtio_dummy_driver = {
                .driver.name =  KBUILD_MODNAME,
-               .driver.owner = THIS_MODULE,
                .id_table =     id_table,
                .probe =        virtio_dummy_probe,
                .remove =       virtio_dummy_remove,
diff --git a/Documentation/filesystems/bcachefs/index.rst b/Documentation/filesystems/bcachefs/index.rst
new file mode 100644 (file)
index 0000000..e2bd61c
--- /dev/null
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+bcachefs Documentation
+======================
+
+.. toctree::
+   :maxdepth: 2
+   :numbered:
+
+   errorcodes
index 0ea1e44fa02823ffd51f4739a3a9aab635a35bbe..1f9b4c905a6a7c0646fca9764829151582eb6e7c 100644 (file)
@@ -69,6 +69,7 @@ Documentation for filesystem implementations.
    afs
    autofs
    autofs-mount-control
+   bcachefs/index
    befs
    bfs
    btrfs
index 0d0334cd51798b63af73cb86f891c07e1c7e587c..3a45a20fc05a1f90a67b5b61e6bbb654145928a3 100644 (file)
@@ -24,10 +24,10 @@ fragmentation statistics can be obtained through gfp flag information of
 each page. It is already implemented and activated if page owner is
 enabled. Other usages are more than welcome.
 
-It can also be used to show all the stacks and their outstanding
-allocations, which gives us a quick overview of where the memory is going
-without the need to screen through all the pages and match the allocation
-and free operation.
+It can also be used to show all the stacks and their current number of
+allocated base pages, which gives us a quick overview of where the memory
+is going without the need to screen through all the pages and match the
+allocation and free operation.
 
 page owner is disabled by default. So, if you'd like to use it, you need
 to add "page_owner=on" to your boot cmdline. If the kernel is built
@@ -75,42 +75,45 @@ Usage
 
        cat /sys/kernel/debug/page_owner_stacks/show_stacks > stacks.txt
        cat stacks.txt
-        prep_new_page+0xa9/0x120
-        get_page_from_freelist+0x7e6/0x2140
-        __alloc_pages+0x18a/0x370
-        new_slab+0xc8/0x580
-        ___slab_alloc+0x1f2/0xaf0
-        __slab_alloc.isra.86+0x22/0x40
-        kmem_cache_alloc+0x31b/0x350
-        __khugepaged_enter+0x39/0x100
-        dup_mmap+0x1c7/0x5ce
-        copy_process+0x1afe/0x1c90
-        kernel_clone+0x9a/0x3c0
-        __do_sys_clone+0x66/0x90
-        do_syscall_64+0x7f/0x160
-        entry_SYSCALL_64_after_hwframe+0x6c/0x74
-       stack_count: 234
+        post_alloc_hook+0x177/0x1a0
+        get_page_from_freelist+0xd01/0xd80
+        __alloc_pages+0x39e/0x7e0
+        allocate_slab+0xbc/0x3f0
+        ___slab_alloc+0x528/0x8a0
+        kmem_cache_alloc+0x224/0x3b0
+        sk_prot_alloc+0x58/0x1a0
+        sk_alloc+0x32/0x4f0
+        inet_create+0x427/0xb50
+        __sock_create+0x2e4/0x650
+        inet_ctl_sock_create+0x30/0x180
+        igmp_net_init+0xc1/0x130
+        ops_init+0x167/0x410
+        setup_net+0x304/0xa60
+        copy_net_ns+0x29b/0x4a0
+        create_new_namespaces+0x4a1/0x820
+       nr_base_pages: 16
        ...
        ...
        echo 7000 > /sys/kernel/debug/page_owner_stacks/count_threshold
        cat /sys/kernel/debug/page_owner_stacks/show_stacks> stacks_7000.txt
        cat stacks_7000.txt
-        prep_new_page+0xa9/0x120
-        get_page_from_freelist+0x7e6/0x2140
-        __alloc_pages+0x18a/0x370
-        alloc_pages_mpol+0xdf/0x1e0
-        folio_alloc+0x14/0x50
-        filemap_alloc_folio+0xb0/0x100
-        page_cache_ra_unbounded+0x97/0x180
-        filemap_fault+0x4b4/0x1200
-        __do_fault+0x2d/0x110
-        do_pte_missing+0x4b0/0xa30
-        __handle_mm_fault+0x7fa/0xb70
-        handle_mm_fault+0x125/0x300
-        do_user_addr_fault+0x3c9/0x840
-        exc_page_fault+0x68/0x150
-        asm_exc_page_fault+0x22/0x30
-       stack_count: 8248
+        post_alloc_hook+0x177/0x1a0
+        get_page_from_freelist+0xd01/0xd80
+        __alloc_pages+0x39e/0x7e0
+        alloc_pages_mpol+0x22e/0x490
+        folio_alloc+0xd5/0x110
+        filemap_alloc_folio+0x78/0x230
+        page_cache_ra_order+0x287/0x6f0
+        filemap_get_pages+0x517/0x1160
+        filemap_read+0x304/0x9f0
+        xfs_file_buffered_read+0xe6/0x1d0 [xfs]
+        xfs_file_read_iter+0x1f0/0x380 [xfs]
+        __kernel_read+0x3b9/0x730
+        kernel_read_file+0x309/0x4d0
+        __do_sys_finit_module+0x381/0x730
+        do_syscall_64+0x8d/0x150
+        entry_SYSCALL_64_after_hwframe+0x62/0x6a
+       nr_base_pages: 20824
        ...
 
        cat /sys/kernel/debug/page_owner > page_owner_full.txt
index aea47e04c3a52aa6774a090c2bd17555306c4a02..ee9cc2b40409ea90bf9b483df91e3690b82a3f68 100644 (file)
@@ -2191,7 +2191,6 @@ N:        mxs
 
 ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE
 M:     Shawn Guo <shawnguo@kernel.org>
-M:     Li Yang <leoyang.li@nxp.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git
@@ -2708,7 +2707,7 @@ F:        sound/soc/rockchip/
 N:     rockchip
 
 ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-samsung-soc@vger.kernel.org
@@ -3573,6 +3572,7 @@ S:        Supported
 C:     irc://irc.oftc.net/bcache
 T:     git https://evilpiepirate.org/git/bcachefs.git
 F:     fs/bcachefs/
+F:     Documentation/filesystems/bcachefs/
 
 BDISP ST MEDIA DRIVER
 M:     Fabien Dessenne <fabien.dessenne@foss.st.com>
@@ -4869,7 +4869,6 @@ F:        drivers/power/supply/cw2015_battery.c
 CEPH COMMON CODE (LIBCEPH)
 M:     Ilya Dryomov <idryomov@gmail.com>
 M:     Xiubo Li <xiubli@redhat.com>
-R:     Jeff Layton <jlayton@kernel.org>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
@@ -4881,7 +4880,6 @@ F:        net/ceph/
 CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
 M:     Xiubo Li <xiubli@redhat.com>
 M:     Ilya Dryomov <idryomov@gmail.com>
-R:     Jeff Layton <jlayton@kernel.org>
 L:     ceph-devel@vger.kernel.org
 S:     Supported
 W:     http://ceph.com/
@@ -5557,7 +5555,7 @@ F:        drivers/cpuidle/cpuidle-big_little.c
 CPUIDLE DRIVER - ARM EXYNOS
 M:     Daniel Lezcano <daniel.lezcano@linaro.org>
 M:     Kukjin Kim <kgene@kernel.org>
-R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-pm@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -8523,7 +8521,6 @@ S:        Maintained
 F:     drivers/video/fbdev/fsl-diu-fb.*
 
 FREESCALE DMA DRIVER
-M:     Li Yang <leoyang.li@nxp.com>
 M:     Zhang Wei <zw@zh-kernel.org>
 L:     linuxppc-dev@lists.ozlabs.org
 S:     Maintained
@@ -8688,10 +8685,9 @@ F:       drivers/soc/fsl/qe/tsa.h
 F:     include/dt-bindings/soc/cpm1-fsl,tsa.h
 
 FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
-M:     Li Yang <leoyang.li@nxp.com>
 L:     netdev@vger.kernel.org
 L:     linuxppc-dev@lists.ozlabs.org
-S:     Maintained
+S:     Orphan
 F:     drivers/net/ethernet/freescale/ucc_geth*
 
 FREESCALE QUICC ENGINE UCC HDLC DRIVER
@@ -8708,10 +8704,9 @@ S:       Maintained
 F:     drivers/tty/serial/ucc_uart.c
 
 FREESCALE SOC DRIVERS
-M:     Li Yang <leoyang.li@nxp.com>
 L:     linuxppc-dev@lists.ozlabs.org
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S:     Maintained
+S:     Orphan
 F:     Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
 F:     Documentation/devicetree/bindings/soc/fsl/
 F:     drivers/soc/fsl/
@@ -8745,10 +8740,9 @@ F:       Documentation/devicetree/bindings/sound/fsl,qmc-audio.yaml
 F:     sound/soc/fsl/fsl_qmc_audio.c
 
 FREESCALE USB PERIPHERAL DRIVERS
-M:     Li Yang <leoyang.li@nxp.com>
 L:     linux-usb@vger.kernel.org
 L:     linuxppc-dev@lists.ozlabs.org
-S:     Maintained
+S:     Orphan
 F:     drivers/usb/gadget/udc/fsl*
 
 FREESCALE USB PHY DRIVER
@@ -9000,7 +8994,7 @@ F:        drivers/i2c/muxes/i2c-mux-gpio.c
 F:     include/linux/platform_data/i2c-mux-gpio.h
 
 GENERIC GPIO RESET DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 S:     Maintained
 F:     drivers/reset/reset-gpio.c
 
@@ -10030,7 +10024,7 @@ F:      drivers/media/platform/st/sti/hva
 
 HWPOISON MEMORY FAILURE HANDLING
 M:     Miaohe Lin <linmiaohe@huawei.com>
-R:     Naoya Horiguchi <naoya.horiguchi@nec.com>
+R:     Naoya Horiguchi <nao.horiguchi@gmail.com>
 L:     linux-mm@kvack.org
 S:     Maintained
 F:     mm/hwpoison-inject.c
@@ -13295,7 +13289,7 @@ F:      drivers/iio/adc/max11205.c
 
 MAXIM MAX17040 FAMILY FUEL GAUGE DRIVERS
 R:     Iskren Chernev <iskren.chernev@gmail.com>
-R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R:     Krzysztof Kozlowski <krzk@kernel.org>
 R:     Marek Szyprowski <m.szyprowski@samsung.com>
 R:     Matheus Castello <matheus@castello.eng.br>
 L:     linux-pm@vger.kernel.org
@@ -13305,7 +13299,7 @@ F:      drivers/power/supply/max17040_battery.c
 
 MAXIM MAX17042 FAMILY FUEL GAUGE DRIVERS
 R:     Hans de Goede <hdegoede@redhat.com>
-R:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+R:     Krzysztof Kozlowski <krzk@kernel.org>
 R:     Marek Szyprowski <m.szyprowski@samsung.com>
 R:     Sebastian Krzyszkowiak <sebastian.krzyszkowiak@puri.sm>
 R:     Purism Kernel Team <kernel@puri.sm>
@@ -13363,7 +13357,7 @@ F:      Documentation/devicetree/bindings/power/supply/maxim,max77976.yaml
 F:     drivers/power/supply/max77976_charger.c
 
 MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 B:     mailto:linux-samsung-soc@vger.kernel.org
@@ -13374,7 +13368,7 @@ F:      drivers/power/supply/max77693_charger.c
 
 MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
 M:     Chanwoo Choi <cw00.choi@samsung.com>
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 B:     mailto:linux-samsung-soc@vger.kernel.org
@@ -14158,7 +14152,7 @@ F:      mm/mm_init.c
 F:     tools/testing/memblock/
 
 MEMORY CONTROLLER DRIVERS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 B:     mailto:krzysztof.kozlowski@linaro.org
@@ -15539,7 +15533,7 @@ F:      include/uapi/linux/nexthop.h
 F:     net/ipv4/nexthop.c
 
 NFC SUBSYSTEM
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     netdev@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/nfc/
@@ -15916,7 +15910,7 @@ F:      Documentation/devicetree/bindings/regulator/nxp,pf8x00-regulator.yaml
 F:     drivers/regulator/pf8x00-regulator.c
 
 NXP PTN5150A CC LOGIC AND EXTCON DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@ -16527,7 +16521,7 @@ K:      of_overlay_remove
 
 OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
 M:     Rob Herring <robh@kernel.org>
-M:     Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
+M:     Krzysztof Kozlowski <krzk+dt@kernel.org>
 M:     Conor Dooley <conor+dt@kernel.org>
 L:     devicetree@vger.kernel.org
 S:     Maintained
@@ -16974,7 +16968,6 @@ F:      drivers/pci/controller/dwc/pci-exynos.c
 
 PCI DRIVER FOR SYNOPSYS DESIGNWARE
 M:     Jingoo Han <jingoohan1@gmail.com>
-M:     Gustavo Pimentel <gustavo.pimentel@synopsys.com>
 M:     Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
 L:     linux-pci@vger.kernel.org
 S:     Maintained
@@ -17485,7 +17478,7 @@ F:      Documentation/devicetree/bindings/pinctrl/renesas,*
 F:     drivers/pinctrl/renesas/
 
 PIN CONTROLLER - SAMSUNG
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -19453,7 +19446,7 @@ F:      Documentation/devicetree/bindings/sound/samsung*
 F:     sound/soc/samsung/
 
 SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-crypto@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -19488,7 +19481,7 @@ S:      Maintained
 F:     drivers/platform/x86/samsung-laptop.c
 
 SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-kernel@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -19514,7 +19507,7 @@ F:      drivers/media/platform/samsung/s3c-camif/
 F:     include/media/drv-intf/s3c_camif.h
 
 SAMSUNG S3FWRN5 NFC DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml
 F:     drivers/nfc/s3fwrn5
@@ -19535,7 +19528,7 @@ S:      Supported
 F:     drivers/media/i2c/s5k5baf.c
 
 SAMSUNG S5P Security SubSystem (SSS) DRIVER
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Vladimir Zapolskiy <vz@mleia.com>
 L:     linux-crypto@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
@@ -19557,7 +19550,7 @@ F:      Documentation/devicetree/bindings/media/samsung,fimc.yaml
 F:     drivers/media/platform/samsung/exynos4-is/
 
 SAMSUNG SOC CLOCK DRIVERS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
 M:     Chanwoo Choi <cw00.choi@samsung.com>
 R:     Alim Akhtar <alim.akhtar@samsung.com>
@@ -19589,7 +19582,7 @@ F:      drivers/net/ethernet/samsung/sxgbe/
 
 SAMSUNG THERMAL DRIVER
 M:     Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 L:     linux-pm@vger.kernel.org
 L:     linux-samsung-soc@vger.kernel.org
 S:     Maintained
@@ -22577,6 +22570,7 @@ Q:      https://patchwork.kernel.org/project/linux-pm/list/
 B:     https://bugzilla.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat
 F:     tools/power/x86/turbostat/
+F:     tools/testing/selftests/turbostat/
 
 TW5864 VIDEO4LINUX DRIVER
 M:     Bluecherry Maintainers <maintainers@bluecherrydvr.com>
@@ -23785,7 +23779,7 @@ S:      Orphan
 F:     drivers/mmc/host/vub300.c
 
 W1 DALLAS'S 1-WIRE BUS
-M:     Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+M:     Krzysztof Kozlowski <krzk@kernel.org>
 S:     Maintained
 F:     Documentation/devicetree/bindings/w1/
 F:     Documentation/w1/
index e1bf12891cb0e4a7471d60cf4b2eb0050d600d2f..59d8a7f95d0a863e4308853f9e01baa9fb8fed84 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 9
 SUBLEVEL = 0
-EXTRAVERSION = -rc3
+EXTRAVERSION = -rc4
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*
index 9f066785bb71d93ca5da01a22d15ed2effba5901..65afb1de48b36e843bd665a91df6b33badfd23d1 100644 (file)
@@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB
 
 config PAGE_SHIFT
        int
-       default 12 if PAGE_SIZE_4KB
-       default 13 if PAGE_SIZE_8KB
-       default 14 if PAGE_SIZE_16KB
-       default 15 if PAGE_SIZE_32KB
-       default 16 if PAGE_SIZE_64KB
-       default 18 if PAGE_SIZE_256KB
+       default 12 if PAGE_SIZE_4KB
+       default 13 if PAGE_SIZE_8KB
+       default 14 if PAGE_SIZE_16KB
+       default 15 if PAGE_SIZE_32KB
+       default 16 if PAGE_SIZE_64KB
+       default 18 if PAGE_SIZE_256KB
 
 # This allows to use a set of generic functions to determine mmap base
 # address by giving priority to top-down scheme only if the process
index 1235a71c6abe96564059010e214f87304d7d4e8c..52869e68f833c4d8f7cefdcefeadba9b8b78f87a 100644 (file)
        bus-width = <4>;
        no-1-8-v;
        no-sdio;
-       no-emmc;
+       no-mmc;
        status = "okay";
 };
 
index ba7231b364bb8c76296e953bbfa450bc49c1293a..7bab113ca6da79ed3941e7d6550fecfd31687f25 100644 (file)
                                remote-endpoint = <&mipi_from_sensor>;
                                clock-lanes = <0>;
                                data-lanes = <1>;
+                               link-frequencies = /bits/ 64 <330000000>;
                        };
                };
        };
index 31755a378c7364b5b5a055fa59b8796600a898a9..ff2a4a4d822047168008446e6c1835bd7358e789 100644 (file)
@@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = {
 static struct gpiod_lookup_table tusb_gpio_table = {
        .dev_id = "musb-tusb",
        .table = {
-               GPIO_LOOKUP("gpio-0-15", 0, "enable",
-                           GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("gpio-48-63", 10, "int",
-                           GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH),
                { }
        },
 };
@@ -140,12 +138,11 @@ static int slot1_cover_open;
 static int slot2_cover_open;
 static struct device *mmc_device;
 
-static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
+static struct gpiod_lookup_table nokia800_mmc_gpio_table = {
        .dev_id = "mmci-omap.0",
        .table = {
                /* Slot switch, GPIO 96 */
-               GPIO_LOOKUP("gpio-80-111", 16,
-                           "switch", GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
                { }
        },
 };
@@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = {
 static struct gpiod_lookup_table nokia810_mmc_gpio_table = {
        .dev_id = "mmci-omap.0",
        .table = {
+               /* Slot switch, GPIO 96 */
+               GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH),
                /* Slot index 1, VSD power, GPIO 23 */
-               GPIO_LOOKUP_IDX("gpio-16-31", 7,
-                               "vsd", 1, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH),
                /* Slot index 1, VIO power, GPIO 9 */
-               GPIO_LOOKUP_IDX("gpio-0-15", 9,
-                               "vio", 1, GPIO_ACTIVE_HIGH),
+               GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH),
                { }
        },
 };
@@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC];
 
 static void __init n8x0_mmc_init(void)
 {
-       gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table);
-
        if (board_is_n810()) {
                mmc1_data.slots[0].name = "external";
 
@@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void)
                mmc1_data.slots[1].name = "internal";
                mmc1_data.slots[1].ban_openended = 1;
                gpiod_add_lookup_table(&nokia810_mmc_gpio_table);
+       } else {
+               gpiod_add_lookup_table(&nokia800_mmc_gpio_table);
        }
 
        mmc1_data.nr_slots = 2;
index 3c42240e78e245fe54ab5c637d9fa071dc2c0b34..4aaf5a0c1ed8af6f7f845be079c9297f35d2d72b 100644 (file)
@@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>;
                fsl,usbphy = <&usbphy1>;
                fsl,usbmisc = <&usbmisc1 0>;
-               clocks = <&usb2_lpcg 0>;
+               clocks = <&usb2_lpcg IMX_LPCG_CLK_6>;
                ahb-burst-config = <0x0>;
                tx-burst-size-dword = <0x10>;
                rx-burst-size-dword = <0x10>;
@@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 {
        usbphy1: usbphy@5b100000 {
                compatible = "fsl,imx7ulp-usbphy";
                reg = <0x5b100000 0x1000>;
-               clocks = <&usb2_lpcg 1>;
+               clocks = <&usb2_lpcg IMX_LPCG_CLK_7>;
                power-domains = <&pd IMX_SC_R_USB_0_PHY>;
                status = "disabled";
        };
@@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>;
                reg = <0x5b010000 0x10000>;
                clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>,
-                        <&sdhc0_lpcg IMX_LPCG_CLK_0>,
-                        <&sdhc0_lpcg IMX_LPCG_CLK_5>;
+                        <&sdhc0_lpcg IMX_LPCG_CLK_5>,
+                        <&sdhc0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "ahb", "per";
                power-domains = <&pd IMX_SC_R_SDHC_0>;
                status = "disabled";
@@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>;
                reg = <0x5b020000 0x10000>;
                clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>,
-                        <&sdhc1_lpcg IMX_LPCG_CLK_0>,
-                        <&sdhc1_lpcg IMX_LPCG_CLK_5>;
+                        <&sdhc1_lpcg IMX_LPCG_CLK_5>,
+                        <&sdhc1_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "ahb", "per";
                power-domains = <&pd IMX_SC_R_SDHC_1>;
                fsl,tuning-start-tap = <20>;
@@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 {
                interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>;
                reg = <0x5b030000 0x10000>;
                clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>,
-                        <&sdhc2_lpcg IMX_LPCG_CLK_0>,
-                        <&sdhc2_lpcg IMX_LPCG_CLK_5>;
+                        <&sdhc2_lpcg IMX_LPCG_CLK_5>,
+                        <&sdhc2_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "ahb", "per";
                power-domains = <&pd IMX_SC_R_SDHC_2>;
                status = "disabled";
index cab3468b1875ee885f32a842f92d56cc0b744998..f7a91d43a0ffe10e85e2b1e71ff6751c314b6ef7 100644 (file)
@@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi0_lpcg 0>,
-                        <&spi0_lpcg 1>;
+               clocks = <&spi0_lpcg IMX_LPCG_CLK_0>,
+                        <&spi0_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi1_lpcg 0>,
-                        <&spi1_lpcg 1>;
+               clocks = <&spi1_lpcg IMX_LPCG_CLK_0>,
+                        <&spi1_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi2_lpcg 0>,
-                        <&spi2_lpcg 1>;
+               clocks = <&spi2_lpcg IMX_LPCG_CLK_0>,
+                        <&spi2_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 {
                #size-cells = <0>;
                interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&spi3_lpcg 0>,
-                        <&spi3_lpcg 1>;
+               clocks = <&spi3_lpcg IMX_LPCG_CLK_0>,
+                        <&spi3_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <60000000>;
@@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 {
                compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm";
                reg = <0x5a190000 0x1000>;
                interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
-               clocks = <&adma_pwm_lpcg 1>,
-                        <&adma_pwm_lpcg 0>;
+               clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>,
+                        <&adma_pwm_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
@@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 {
                reg = <0x5a880000 0x10000>;
                interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&adc0_lpcg 0>,
-                        <&adc0_lpcg 1>;
+               clocks = <&adc0_lpcg IMX_LPCG_CLK_0>,
+                        <&adc0_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
@@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 {
                reg = <0x5a890000 0x10000>;
                interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&adc1_lpcg 0>,
-                        <&adc1_lpcg 1>;
+               clocks = <&adc1_lpcg IMX_LPCG_CLK_0>,
+                        <&adc1_lpcg IMX_LPCG_CLK_4>;
                clock-names = "per", "ipg";
                assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
@@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 {
                reg = <0x5a8d0000 0x10000>;
                interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>;
                interrupt-parent = <&gic>;
-               clocks = <&can0_lpcg 1>,
-                        <&can0_lpcg 0>;
+               clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+                        <&can0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <40000000>;
@@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 {
                 * CAN1 shares CAN0's clock and to enable CAN0's clock it
                 * has to be powered on.
                 */
-               clocks = <&can0_lpcg 1>,
-                        <&can0_lpcg 0>;
+               clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+                        <&can0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <40000000>;
@@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 {
                 * CAN2 shares CAN0's clock and to enable CAN0's clock it
                 * has to be powered on.
                 */
-               clocks = <&can0_lpcg 1>,
-                        <&can0_lpcg 0>;
+               clocks = <&can0_lpcg IMX_LPCG_CLK_4>,
+                        <&can0_lpcg IMX_LPCG_CLK_0>;
                clock-names = "ipg", "per";
                assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <40000000>;
index 7e510b21bbac555b38cede99f97b4edc177bf520..764c1a08e3b118841299d99a5cecb29a095e2f66 100644 (file)
@@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d000000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm0_lpcg 4>,
-                        <&pwm0_lpcg 1>;
+               clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm0_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
@@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d010000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm1_lpcg 4>,
-                        <&pwm1_lpcg 1>;
+               clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm1_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
@@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d020000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm2_lpcg 4>,
-                        <&pwm2_lpcg 1>;
+               clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm2_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
@@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 {
                compatible = "fsl,imx27-pwm";
                reg = <0x5d030000 0x10000>;
                clock-names = "ipg", "per";
-               clocks = <&pwm3_lpcg 4>,
-                        <&pwm3_lpcg 1>;
+               clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>,
+                        <&pwm3_lpcg IMX_LPCG_CLK_1>;
                assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>;
                assigned-clock-rates = <24000000>;
                #pwm-cells = <3>;
index 41c79d2ebdd6201dc10278204c064a4c01c71709..f24b14744799e16bb1145738bfb18fd8343c00ee 100644 (file)
@@ -14,6 +14,7 @@
                pinctrl-0 = <&pinctrl_usbcon1>;
                type = "micro";
                label = "otg";
+               vbus-supply = <&reg_usb1_vbus>;
                id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
 
                port {
 };
 
 &usb3_phy0 {
-       vbus-supply = <&reg_usb1_vbus>;
        status = "okay";
 };
 
index d5c400b355af564123497cd1805e0b0ad56ded21..f5491a608b2f3793ca410871fda7e5005db661e1 100644 (file)
@@ -14,6 +14,7 @@
                pinctrl-0 = <&pinctrl_usbcon1>;
                type = "micro";
                label = "otg";
+               vbus-supply = <&reg_usb1_vbus>;
                id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
 
                port {
 };
 
 &usb3_phy0 {
-       vbus-supply = <&reg_usb1_vbus>;
        status = "okay";
 };
 
index 11626fae5f97f3a9b2c94528d1957fdc73f9aac8..aa9f28c4431d0249cce852026eda7a9a7cad3ff0 100644 (file)
 };
 
 &flexcan2 {
-       clocks = <&can1_lpcg 1>,
-                <&can1_lpcg 0>;
+       clocks = <&can1_lpcg IMX_LPCG_CLK_4>,
+                <&can1_lpcg IMX_LPCG_CLK_0>;
        assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>;
        fsl,clk-source = /bits/ 8 <1>;
 };
 
 &flexcan3 {
-       clocks = <&can2_lpcg 1>,
-                <&can2_lpcg 0>;
+       clocks = <&can2_lpcg IMX_LPCG_CLK_4>,
+                <&can2_lpcg IMX_LPCG_CLK_0>;
        assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>;
        fsl,clk-source = /bits/ 8 <1>;
 };
index 3b0e8248e1a41a1ead90bdbf4fea82054d9fcd90..a75de2665d844510a69d4af337ad1b5827b012c8 100644 (file)
@@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void)
 #define MAX_TLBI_RANGE_PAGES           __TLBI_RANGE_PAGES(31, 3)
 
 /*
- * Generate 'num' values from -1 to 30 with -1 rejected by the
- * __flush_tlb_range() loop below.
+ * Generate 'num' values from -1 to 31 with -1 rejected by the
+ * __flush_tlb_range() loop below. Its return value is only
+ * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If
+ * 'pages' is more than that, you must iterate over the overall
+ * range.
  */
-#define TLBI_RANGE_MASK                        GENMASK_ULL(4, 0)
-#define __TLBI_RANGE_NUM(pages, scale) \
-       ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
+#define __TLBI_RANGE_NUM(pages, scale)                                 \
+       ({                                                              \
+               int __pages = min((pages),                              \
+                                 __TLBI_RANGE_PAGES(31, (scale)));     \
+               (__pages >> (5 * (scale) + 1)) - 1;                     \
+       })
 
 /*
  *     TLB Invalidation
@@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
  * 3. If there is 1 page remaining, flush it through non-range operations. Range
  *    operations can only span an even number of pages. We save this for last to
  *    ensure 64KB start alignment is maintained for the LPA2 case.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
  */
 #define __flush_tlb_range_op(op, start, pages, stride,                 \
                                asid, tlb_level, tlbi_user, lpa2)       \
index 49a70f8c3cab22b758dd290a9fab2374a62abae9..b6aeb1f70e2a038ac2eb3bfe6c402bd37b4dcd6a 100644 (file)
                #size-cells = <2>;
                dma-coherent;
 
+               isa@18000000 {
+                       compatible = "isa";
+                       #size-cells = <1>;
+                       #address-cells = <2>;
+                       ranges = <1 0x0 0x0 0x18000000 0x4000>;
+               };
+
                liointc0: interrupt-controller@1fe01400 {
                        compatible = "loongson,liointc-2.0";
                        reg = <0x0 0x1fe01400 0x0 0x40>,
index dca91caf895e3cd9e428e75b91da9392bfb49d82..74b99bd234cc38df9a087915280e86ddb5bd56d4 100644 (file)
 
 &gmac0 {
        status = "okay";
+
+       phy-mode = "gmii";
+       phy-handle = <&phy0>;
+       mdio {
+               compatible = "snps,dwmac-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy0: ethernet-phy@0 {
+                       reg = <2>;
+               };
+       };
 };
 
 &gmac1 {
        status = "okay";
+
+       phy-mode = "gmii";
+       phy-handle = <&phy1>;
+       mdio {
+               compatible = "snps,dwmac-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy1: ethernet-phy@1 {
+                       reg = <2>;
+               };
+       };
 };
 
 &gmac2 {
        status = "okay";
+
+       phy-mode = "rgmii";
+       phy-handle = <&phy2>;
+       mdio {
+               compatible = "snps,dwmac-mdio";
+               #address-cells = <1>;
+               #size-cells = <0>;
+               phy2: ethernet-phy@2 {
+                       reg = <0>;
+               };
+       };
 };
index a231949b5f553a3814f48f6875e65ac2ed73d09a..9eab2d02cbe8bff12a26ce11dd7ac1543b7c1f82 100644 (file)
                #address-cells = <2>;
                #size-cells = <2>;
 
+               isa@18400000 {
+                       compatible = "isa";
+                       #size-cells = <1>;
+                       #address-cells = <2>;
+                       ranges = <1 0x0 0x0 0x18400000 0x4000>;
+               };
+
                pmc: power-management@100d0000 {
                        compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon";
                        reg = <0x0 0x100d0000 0x0 0x58>;
                msi: msi-controller@1fe01140 {
                        compatible = "loongson,pch-msi-1.0";
                        reg = <0x0 0x1fe01140 0x0 0x8>;
+                       interrupt-controller;
+                       #interrupt-cells = <1>;
                        msi-controller;
                        loongson,msi-base-vec = <64>;
                        loongson,msi-num-vecs = <192>;
                        #address-cells = <3>;
                        #size-cells = <2>;
                        device_type = "pci";
+                       msi-parent = <&msi>;
                        bus-range = <0x0 0xff>;
-                       ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>,
+                       ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>,
                                 <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>;
 
                        gmac0: ethernet@3,0 {
                                reg = <0x1800 0x0 0x0 0x0 0x0>;
-                               interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <12 IRQ_TYPE_LEVEL_HIGH>,
+                                            <13 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupt-names = "macirq", "eth_lpi";
                                interrupt-parent = <&pic>;
                                status = "disabled";
                        };
 
                        gmac1: ethernet@3,1 {
                                reg = <0x1900 0x0 0x0 0x0 0x0>;
-                               interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
+                                            <15 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupt-names = "macirq", "eth_lpi";
                                interrupt-parent = <&pic>;
                                status = "disabled";
                        };
 
                        gmac2: ethernet@3,2 {
                                reg = <0x1a00 0x0 0x0 0x0 0x0>;
-                               interrupts = <17 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupts = <17 IRQ_TYPE_LEVEL_HIGH>,
+                                            <18 IRQ_TYPE_LEVEL_HIGH>;
+                               interrupt-names = "macirq", "eth_lpi";
                                interrupt-parent = <&pic>;
                                status = "disabled";
                        };
index b24437e28c6eda457b2be003b51ad3809600f7cc..7bd47d65bf7a048fda5183ed6844d5dbc129b232 100644 (file)
@@ -11,6 +11,7 @@
 #define _ASM_ADDRSPACE_H
 
 #include <linux/const.h>
+#include <linux/sizes.h>
 
 #include <asm/loongarch.h>
 
index 4a8adcca329b81e4f289dd7825fb15dbf2f4f7a9..c2f9979b2979e5e92e791e3f8304975db9e929c9 100644 (file)
 #include <asm/pgtable-bits.h>
 #include <asm/string.h>
 
-/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page)     ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
 extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size);
 extern void __init early_iounmap(void __iomem *addr, unsigned long size);
 
@@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t
 
 #define __io_aw() mmiowb()
 
+#ifdef CONFIG_KFENCE
+#define virt_to_phys(kaddr)                                                            \
+({                                                                                     \
+       (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) :     \
+       page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\
+})
+
+#define phys_to_virt(paddr)                                                            \
+({                                                                                     \
+       extern char *__kfence_pool;                                                     \
+       (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) :                \
+       page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\
+})
+#endif
+
 #include <asm-generic/io.h>
 
 #define ARCH_HAS_VALID_PHYS_ADDR_RANGE
index 6c82aea1c99398c46484a77cc28da1316799affb..a6a5760da3a3323641e3fa422f3da87cdb4b66f8 100644 (file)
@@ -16,6 +16,7 @@
 static inline bool arch_kfence_init_pool(void)
 {
        int err;
+       char *kaddr, *vaddr;
        char *kfence_pool = __kfence_pool;
        struct vm_struct *area;
 
@@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void)
                return false;
        }
 
+       kaddr = kfence_pool;
+       vaddr = __kfence_pool;
+       while (kaddr < kfence_pool + KFENCE_POOL_SIZE) {
+               set_page_address(virt_to_page(kaddr), vaddr);
+               kaddr += PAGE_SIZE;
+               vaddr += PAGE_SIZE;
+       }
+
        return true;
 }
 
index 44027060c54a28bd34a80f538135491e3ebc758a..e85df33f11c77212c2e8ec8e6b3f1dbb955bc622 100644 (file)
@@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 struct page *dmw_virt_to_page(unsigned long kaddr);
 struct page *tlb_virt_to_page(unsigned long kaddr);
 
-#define virt_to_pfn(kaddr)     PFN_DOWN(PHYSADDR(kaddr))
+#define pfn_to_phys(pfn)       __pfn_to_phys(pfn)
+#define phys_to_pfn(paddr)     __phys_to_pfn(paddr)
+
+#define page_to_phys(page)     pfn_to_phys(page_to_pfn(page))
+#define phys_to_page(paddr)    pfn_to_page(phys_to_pfn(paddr))
+
+#ifndef CONFIG_KFENCE
+
+#define page_to_virt(page)     __va(page_to_phys(page))
+#define virt_to_page(kaddr)    phys_to_page(__pa(kaddr))
+
+#else
+
+#define WANT_PAGE_VIRTUAL
+
+#define page_to_virt(page)                                                             \
+({                                                                                     \
+       extern char *__kfence_pool;                                                     \
+       (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page);        \
+})
 
 #define virt_to_page(kaddr)                                                            \
 ({                                                                                     \
@@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr);
        dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
 })
 
+#endif
+
+#define pfn_to_virt(pfn)       page_to_virt(pfn_to_page(pfn))
+#define virt_to_pfn(kaddr)     page_to_pfn(virt_to_page(kaddr))
+
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
 
index a9630a81b38abbfc575ea4174af049ccd5a9a888..89af7c12e8c08d4faab2919cf22034b5ab0f5a6b 100644 (file)
@@ -4,6 +4,7 @@
  */
 #include <linux/export.h>
 #include <linux/io.h>
+#include <linux/kfence.h>
 #include <linux/memblock.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
@@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr)
 {
        unsigned long vaddr = (unsigned long)kaddr;
 
+       if (is_kfence_address((void *)kaddr))
+               return 1;
+
        if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base))
                return 0;
 
index 2aae72e638713a658475e6fb82fc73eae0fc3469..bda018150000e66b906420ea7e3a5f79472ca352 100644 (file)
 
 struct page *dmw_virt_to_page(unsigned long kaddr)
 {
-       return pfn_to_page(virt_to_pfn(kaddr));
+       return phys_to_page(__pa(kaddr));
 }
 EXPORT_SYMBOL(dmw_virt_to_page);
 
 struct page *tlb_virt_to_page(unsigned long kaddr)
 {
-       return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+       return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr))));
 }
 EXPORT_SYMBOL(tlb_virt_to_page);
 
index d14d0e37ad02ddf10b42cfed590c65f97f8de424..4a2b40ce39e0911d74806b2db54d69a9735d33ef 100644 (file)
@@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs);
 #define exception_ip(regs) exception_ip(regs)
 #define profile_pc(regs) instruction_pointer(regs)
 
-extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall);
+extern asmlinkage long syscall_trace_enter(struct pt_regs *regs);
 extern asmlinkage void syscall_trace_leave(struct pt_regs *regs);
 
 extern void die(const char *, struct pt_regs *) __noreturn;
index d1b11f66f748f06483edbc08e48d1b4e5e684156..cb1045ebab0621ad2c8c59eaebe96b13d47e4514 100644 (file)
@@ -101,6 +101,7 @@ void output_thread_info_defines(void)
        OFFSET(TI_CPU, thread_info, cpu);
        OFFSET(TI_PRE_COUNT, thread_info, preempt_count);
        OFFSET(TI_REGS, thread_info, regs);
+       OFFSET(TI_SYSCALL, thread_info, syscall);
        DEFINE(_THREAD_SIZE, THREAD_SIZE);
        DEFINE(_THREAD_MASK, THREAD_MASK);
        DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
index 59288c13b581b89ccb46214c7be02126a017dab2..61503a36067e9ef15c2ff7598256c6fd1de6ac8d 100644 (file)
@@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request,
  * Notification of system call entry/exit
  * - triggered by current->work.syscall_trace
  */
-asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
+asmlinkage long syscall_trace_enter(struct pt_regs *regs)
 {
        user_exit();
 
-       current_thread_info()->syscall = syscall;
-
        if (test_thread_flag(TIF_SYSCALL_TRACE)) {
                if (ptrace_report_syscall_entry(regs))
                        return -1;
-               syscall = current_thread_info()->syscall;
        }
 
 #ifdef CONFIG_SECCOMP
@@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
                struct seccomp_data sd;
                unsigned long args[6];
 
-               sd.nr = syscall;
+               sd.nr = current_thread_info()->syscall;
                sd.arch = syscall_get_arch(current);
                syscall_get_arguments(current, regs, args);
                for (i = 0; i < 6; i++)
@@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
                ret = __secure_computing(&sd);
                if (ret == -1)
                        return ret;
-               syscall = current_thread_info()->syscall;
        }
 #endif
 
        if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
                trace_sys_enter(regs, regs->regs[2]);
 
-       audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
+       audit_syscall_entry(current_thread_info()->syscall,
+                           regs->regs[4], regs->regs[5],
                            regs->regs[6], regs->regs[7]);
 
        /*
         * Negative syscall numbers are mistaken for rejected syscalls, but
         * won't have had the return value set appropriately, so we do so now.
         */
-       if (syscall < 0)
+       if (current_thread_info()->syscall < 0)
                syscall_set_return_value(current, regs, -ENOSYS, 0);
-       return syscall;
+       return current_thread_info()->syscall;
 }
 
 /*
index 18dc9b34505614d2bc84767479a3e9972c1ba8ad..2c604717e63080b1c1949a080bfadf1cab94acd6 100644 (file)
@@ -77,6 +77,18 @@ loads_done:
        PTR_WD  load_a7, bad_stack_a7
        .previous
 
+       /*
+        * syscall number is in v0 unless we called syscall(__NR_###)
+        * where the real syscall number is in a0
+        */
+       subu    t2, v0,  __NR_O32_Linux
+       bnez    t2, 1f /* __NR_syscall at offset 0 */
+       LONG_S  a0, TI_SYSCALL($28)     # Save a0 as syscall number
+       b       2f
+1:
+       LONG_S  v0, TI_SYSCALL($28)     # Save v0 as syscall number
+2:
+
        lw      t0, TI_FLAGS($28)       # syscall tracing enabled?
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        and     t0, t1
@@ -114,16 +126,7 @@ syscall_trace_entry:
        SAVE_STATIC
        move    a0, sp
 
-       /*
-        * syscall number is in v0 unless we called syscall(__NR_###)
-        * where the real syscall number is in a0
-        */
-       move    a1, v0
-       subu    t2, v0,  __NR_O32_Linux
-       bnez    t2, 1f /* __NR_syscall at offset 0 */
-       lw      a1, PT_R4(sp)
-
-1:     jal     syscall_trace_enter
+       jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
 
index 97456b2ca7dc32f13cac9a5843a3adea89735318..97788859238c344a64d1f75f2fdd6c2a4bc58006 100644 (file)
@@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp)
 
        sd      a3, PT_R26(sp)          # save a3 for syscall restarting
 
+       LONG_S  v0, TI_SYSCALL($28)     # Store syscall number
+
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        LONG_L  t0, TI_FLAGS($28)       # syscall tracing enabled?
        and     t0, t1, t0
@@ -72,7 +74,6 @@ syscall_common:
 n32_syscall_trace_entry:
        SAVE_STATIC
        move    a0, sp
-       move    a1, v0
        jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
index e6264aa62e457f02b8a50df8b266a58b8361717d..be11ea5cc67e043c8a20fe0fecb4a0414b589ee9 100644 (file)
@@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp)
 
        sd      a3, PT_R26(sp)          # save a3 for syscall restarting
 
+       LONG_S  v0, TI_SYSCALL($28)     # Store syscall number
+
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        LONG_L  t0, TI_FLAGS($28)       # syscall tracing enabled?
        and     t0, t1, t0
@@ -82,7 +84,6 @@ n64_syscall_exit:
 syscall_trace_entry:
        SAVE_STATIC
        move    a0, sp
-       move    a1, v0
        jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
index d3c2616cba22690bffd63b4521dc0f0ea7216315..7a5abb73e53127876af7e9d5f13dae2f8b08c3e8 100644 (file)
@@ -79,6 +79,22 @@ loads_done:
        PTR_WD  load_a7, bad_stack_a7
        .previous
 
+       /*
+        * absolute syscall number is in v0 unless we called syscall(__NR_###)
+        * where the real syscall number is in a0
+        * note: NR_syscall is the first O32 syscall but the macro is
+        * only defined when compiling with -mabi=32 (CONFIG_32BIT)
+        * therefore __NR_O32_Linux is used (4000)
+        */
+
+       subu    t2, v0,  __NR_O32_Linux
+       bnez    t2, 1f /* __NR_syscall at offset 0 */
+       LONG_S  a0, TI_SYSCALL($28)     # Save a0 as syscall number
+       b       2f
+1:
+       LONG_S  v0, TI_SYSCALL($28)     # Save v0 as syscall number
+2:
+
        li      t1, _TIF_WORK_SYSCALL_ENTRY
        LONG_L  t0, TI_FLAGS($28)       # syscall tracing enabled?
        and     t0, t1, t0
@@ -113,22 +129,7 @@ trace_a_syscall:
        sd      a7, PT_R11(sp)          # For indirect syscalls
 
        move    a0, sp
-       /*
-        * absolute syscall number is in v0 unless we called syscall(__NR_###)
-        * where the real syscall number is in a0
-        * note: NR_syscall is the first O32 syscall but the macro is
-        * only defined when compiling with -mabi=32 (CONFIG_32BIT)
-        * therefore __NR_O32_Linux is used (4000)
-        */
-       .set    push
-       .set    reorder
-       subu    t1, v0,  __NR_O32_Linux
-       move    a1, v0
-       bnez    t1, 1f /* __NR_syscall at offset 0 */
-       ld      a1, PT_R4(sp) /* Arg1 for __NR_syscall case */
-       .set    pop
-
-1:     jal     syscall_trace_enter
+       jal     syscall_trace_enter
 
        bltz    v0, 1f                  # seccomp failed? Skip syscall
 
index 4fff6ed46e902cfbe723cf5ed5ce517e2d131891..4474bf32d0a4970daec7fad3f12f8aa4a9e43871 100644 (file)
@@ -2633,6 +2633,16 @@ config MITIGATION_RFDS
          stored in floating point, vector and integer registers.
          See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst>
 
+config MITIGATION_SPECTRE_BHI
+       bool "Mitigate Spectre-BHB (Branch History Injection)"
+       depends on CPU_SUP_INTEL
+       default y
+       help
+         Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks
+         where the branch history buffer is poisoned to speculatively steer
+         indirect branches.
+         See <file:Documentation/admin-guide/hw-vuln/spectre.rst>
+
 endif
 
 config ARCH_HAS_ADD_PAGES
index 6356060caaf311af8370ccaeb69aab85847b62d1..6de50b80702e61087c432faf580a0ab063d22507 100644 (file)
@@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr)
 
        if (likely(unr < NR_syscalls)) {
                unr = array_index_nospec(unr, NR_syscalls);
-               regs->ax = sys_call_table[unr](regs);
+               regs->ax = x64_sys_call(regs, unr);
                return true;
        }
        return false;
@@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
 
        if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) {
                xnr = array_index_nospec(xnr, X32_NR_syscalls);
-               regs->ax = x32_sys_call_table[xnr](regs);
+               regs->ax = x32_sys_call(regs, xnr);
                return true;
        }
        return false;
@@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr)
 
        if (likely(unr < IA32_NR_syscalls)) {
                unr = array_index_nospec(unr, IA32_NR_syscalls);
-               regs->ax = ia32_sys_call_table[unr](regs);
+               regs->ax = ia32_sys_call(regs, unr);
        } else if (nr != -1) {
                regs->ax = __ia32_sys_ni_syscall(regs);
        }
@@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void)
 }
 
 /**
- * int80_emulation - 32-bit legacy syscall entry
+ * do_int80_emulation - 32-bit legacy syscall C entry from asm
  *
  * This entry point can be used by 32-bit and 64-bit programs to perform
  * 32-bit system calls.  Instances of INT $0x80 can be found inline in
@@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void)
  *   eax:                              system call number
  *   ebx, ecx, edx, esi, edi, ebp:     arg1 - arg 6
  */
-DEFINE_IDTENTRY_RAW(int80_emulation)
+__visible noinstr void do_int80_emulation(struct pt_regs *regs)
 {
        int nr;
 
index 8af2a26b24f6a9783f9bb348cd67c15e1c3799c8..1b5be07f86698a3b634a0d83b6578775781d1739 100644 (file)
@@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
        /* clobbers %rax, make sure it is after saving the syscall nr */
        IBRS_ENTER
        UNTRAIN_RET
+       CLEAR_BRANCH_HISTORY
 
        call    do_syscall_64           /* returns with IRQs disabled */
 
@@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
        call    make_task_dead
 SYM_CODE_END(rewind_stack_and_make_dead)
 .popsection
+
+/*
+ * This sequence executes branches in order to remove user branch information
+ * from the branch history tracker in the Branch Predictor, therefore removing
+ * user influence on subsequent BTB lookups.
+ *
+ * It should be used on parts prior to Alder Lake. Newer parts should use the
+ * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being
+ * virtualized on newer hardware the VMM should protect against BHI attacks by
+ * setting BHI_DIS_S for the guests.
+ *
+ * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging
+ * and not clearing the branch history. The call tree looks like:
+ *
+ * call 1
+ *    call 2
+ *      call 2
+ *        call 2
+ *          call 2
+ *           call 2
+ *           ret
+ *         ret
+ *        ret
+ *      ret
+ *    ret
+ * ret
+ *
+ * This means that the stack is non-constant and ORC can't unwind it with %rsp
+ * alone.  Therefore we unconditionally set up the frame pointer, which allows
+ * ORC to unwind properly.
+ *
+ * The alignment is for performance and not for safety, and may be safely
+ * refactored in the future if needed.
+ */
+SYM_FUNC_START(clear_bhb_loop)
+       push    %rbp
+       mov     %rsp, %rbp
+       movl    $5, %ecx
+       ANNOTATE_INTRA_FUNCTION_CALL
+       call    1f
+       jmp     5f
+       .align 64, 0xcc
+       ANNOTATE_INTRA_FUNCTION_CALL
+1:     call    2f
+       RET
+       .align 64, 0xcc
+2:     movl    $5, %eax
+3:     jmp     4f
+       nop
+4:     sub     $1, %eax
+       jnz     3b
+       sub     $1, %ecx
+       jnz     1b
+       RET
+5:     lfence
+       pop     %rbp
+       RET
+SYM_FUNC_END(clear_bhb_loop)
+EXPORT_SYMBOL_GPL(clear_bhb_loop)
+STACK_FRAME_NON_STANDARD(clear_bhb_loop)
index eabf48c4d4b4c30367792f5d9a0b158a9ecf8a04..c779046cc3fe792658a984648328000535812dea 100644 (file)
@@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
 
        IBRS_ENTER
        UNTRAIN_RET
+       CLEAR_BRANCH_HISTORY
 
        /*
         * SYSENTER doesn't filter flags, so we need to clear NT and AC
@@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
 
        IBRS_ENTER
        UNTRAIN_RET
+       CLEAR_BRANCH_HISTORY
 
        movq    %rsp, %rdi
        call    do_fast_syscall_32
@@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
        ANNOTATE_NOENDBR
        int3
 SYM_CODE_END(entry_SYSCALL_compat)
+
+/*
+ * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries
+ * point to C routines, however since this is a system call interface the branch
+ * history needs to be scrubbed to protect against BHI attacks, and that
+ * scrubbing needs to take place in assembly code prior to entering any C
+ * routines.
+ */
+SYM_CODE_START(int80_emulation)
+       ANNOTATE_NOENDBR
+       UNWIND_HINT_FUNC
+       CLEAR_BRANCH_HISTORY
+       jmp do_int80_emulation
+SYM_CODE_END(int80_emulation)
index 8cfc9bc73e7f8b21f748367256a78df3dc5e5b4a..c2235bae17ef665098342c323a24e4b388c169cb 100644 (file)
 #include <asm/syscalls_32.h>
 #undef __SYSCALL
 
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
+#ifdef CONFIG_X86_32
 #define __SYSCALL(nr, sym) __ia32_##sym,
-
-__visible const sys_call_ptr_t ia32_sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_32.h>
 };
+#undef __SYSCALL
+#endif
+
+#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs);
+
+long ia32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+       switch (nr) {
+       #include <asm/syscalls_32.h>
+       default: return __ia32_sys_ni_syscall(regs);
+       }
+};
index be120eec1fc9f95c69c23074bcd3fbc355b90d47..33b3f09e6f151e11faca1c9d13f0eb4917f3392b 100644 (file)
 #include <asm/syscalls_64.h>
 #undef __SYSCALL
 
+/*
+ * The sys_call_table[] is no longer used for system calls, but
+ * kernel/trace/trace_syscalls.c still wants to know the system
+ * call address.
+ */
 #define __SYSCALL(nr, sym) __x64_##sym,
-
-asmlinkage const sys_call_ptr_t sys_call_table[] = {
+const sys_call_ptr_t sys_call_table[] = {
 #include <asm/syscalls_64.h>
 };
+#undef __SYSCALL
+
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
+
+long x64_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+       switch (nr) {
+       #include <asm/syscalls_64.h>
+       default: return __x64_sys_ni_syscall(regs);
+       }
+};
index bdd0e03a1265d23e474c5c45e1bd64e7b14b7b79..03de4a93213182c6fa5809b077a54ea51be411ea 100644 (file)
 #include <asm/syscalls_x32.h>
 #undef __SYSCALL
 
-#define __SYSCALL(nr, sym) __x64_##sym,
+#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs);
 
-asmlinkage const sys_call_ptr_t x32_sys_call_table[] = {
-#include <asm/syscalls_x32.h>
+long x32_sys_call(const struct pt_regs *regs, unsigned int nr)
+{
+       switch (nr) {
+       #include <asm/syscalls_x32.h>
+       default: return __x64_sys_ni_syscall(regs);
+       }
 };
index 09050641ce5d3c02ad099d8faabbe5e98fe57570..5b0dd07b1ef19e915c1553eb13ca1c20ef1814ff 100644 (file)
@@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
        while (++i < cpuc->n_events) {
                cpuc->event_list[i-1] = cpuc->event_list[i];
                cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
+               cpuc->assign[i-1] = cpuc->assign[i];
        }
        cpuc->event_constraint[i-1] = NULL;
        --cpuc->n_events;
index 5fc45543e95502cf16607e69e891c6e282136b30..0569f579338b516b22fe447248ae1ae4e4880a03 100644 (file)
@@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu)
  * IPI implementation on Hyper-V.
  */
 static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
-               bool exclude_self)
+                              bool exclude_self)
 {
        struct hv_send_ipi_ex *ipi_arg;
        unsigned long flags;
@@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
        if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) {
                ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
 
-               nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask,
-                               exclude_self ? cpu_is_self : NULL);
+               nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask,
+                                               exclude_self ? cpu_is_self : NULL);
 
                /*
                 * 'nr_bank <= 0' means some CPUs in cpumask can't be
@@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
        }
 
        status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank,
-                             ipi_arg, NULL);
+                                    ipi_arg, NULL);
 
 ipi_mask_ex_done:
        local_irq_restore(flags);
@@ -155,7 +155,7 @@ ipi_mask_ex_done:
 }
 
 static bool __send_ipi_mask(const struct cpumask *mask, int vector,
-               bool exclude_self)
+                           bool exclude_self)
 {
        int cur_cpu, vcpu, this_cpu = smp_processor_id();
        struct hv_send_ipi ipi_arg;
@@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
                        return false;
        }
 
-       if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+       if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
                return false;
 
        /*
@@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector,
        }
 
        status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector,
-                                    ipi_arg.cpu_mask);
+                                       ipi_arg.cpu_mask);
        return hv_result_success(status);
 
 do_ex_hypercall:
@@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector)
                        return false;
        }
 
-       if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+       if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR)
                return false;
 
        if (vp >= 64)
index 68a0843d4750f765b50dd303c82bc445f442646e..3fa1f2ee7b0d0630df03675bddfdad0c40ad411d 100644 (file)
@@ -3,7 +3,6 @@
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/clockchips.h>
-#include <linux/acpi.h>
 #include <linux/hyperv.h>
 #include <linux/slab.h>
 #include <linux/cpuhotplug.h>
@@ -116,12 +115,11 @@ free_buf:
 
 int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
 {
-       struct hv_add_logical_processor_in *input;
-       struct hv_add_logical_processor_out *output;
+       struct hv_input_add_logical_processor *input;
+       struct hv_output_add_logical_processor *output;
        u64 status;
        unsigned long flags;
        int ret = HV_STATUS_SUCCESS;
-       int pxm = node_to_pxm(node);
 
        /*
         * When adding a logical processor, the hypervisor may return
@@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
 
                input->lp_index = lp_index;
                input->apic_id = apic_id;
-               input->flags = 0;
-               input->proximity_domain_info.domain_id = pxm;
-               input->proximity_domain_info.flags.reserved = 0;
-               input->proximity_domain_info.flags.proximity_info_valid = 1;
-               input->proximity_domain_info.flags.proximity_preferred = 1;
+               input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
                status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
                                         input, output);
                local_irq_restore(flags);
@@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
        u64 status;
        unsigned long irq_flags;
        int ret = HV_STATUS_SUCCESS;
-       int pxm = node_to_pxm(node);
 
        /* Root VPs don't seem to need pages deposited */
        if (partition_id != hv_current_partition_id) {
@@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
                input->vp_index = vp_index;
                input->flags = flags;
                input->subnode_type = HvSubnodeAny;
-               if (node != NUMA_NO_NODE) {
-                       input->proximity_domain_info.domain_id = pxm;
-                       input->proximity_domain_info.flags.reserved = 0;
-                       input->proximity_domain_info.flags.proximity_info_valid = 1;
-                       input->proximity_domain_info.flags.proximity_preferred = 1;
-               } else {
-                       input->proximity_domain_info.as_uint64 = 0;
-               }
+               input->proximity_domain_info = hv_numa_node_to_pxm_info(node);
                status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
                local_irq_restore(irq_flags);
 
index 94ce0f7c9d3a26cd2b766a60042a0b941b3fe0d2..e6ab0cf15ed573b3acfd5fce79bc20cfce7c493a 100644 (file)
@@ -13,6 +13,7 @@
 #include <asm/mpspec.h>
 #include <asm/msr.h>
 #include <asm/hardirq.h>
+#include <asm/io.h>
 
 #define ARCH_APICTIMER_STOPS_ON_C3     1
 
@@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
 
 static inline u32 native_apic_mem_read(u32 reg)
 {
-       return *((volatile u32 *)(APIC_BASE + reg));
+       return readl((void __iomem *)(APIC_BASE + reg));
 }
 
 static inline void native_apic_mem_eoi(void)
index a38f8f9ba65729125234814c08547498e4e3b8bc..3c7434329661c66e7c34283f0a3f2c59a87f8044 100644 (file)
 
 /*
  * Extended auxiliary flags: Linux defined - for features scattered in various
- * CPUID levels like 0x80000022, etc.
+ * CPUID levels like 0x80000022, etc and Linux defined features.
  *
  * Reuse free bits when adding new feature flags!
  */
 #define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP     (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL           (21*32+ 2) /* "" BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW       (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
 
 /*
  * BUG word(s)
 #define X86_BUG_SRSO                   X86_BUG(1*32 + 0) /* AMD SRSO bug */
 #define X86_BUG_DIV0                   X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
 #define X86_BUG_RFDS                   X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI                    X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
 #endif /* _ASM_X86_CPUFEATURES_H */
index 05956bd8bacf50e35f463c13720a38735fe8b1b5..e72c2b87295799af9d44eb84f59d095f4f90acfd 100644 (file)
 #define SPEC_CTRL_SSBD                 BIT(SPEC_CTRL_SSBD_SHIFT)       /* Speculative Store Bypass Disable */
 #define SPEC_CTRL_RRSBA_DIS_S_SHIFT    6          /* Disable RRSBA behavior */
 #define SPEC_CTRL_RRSBA_DIS_S          BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
+#define SPEC_CTRL_BHI_DIS_S_SHIFT      10         /* Disable Branch History Injection behavior */
+#define SPEC_CTRL_BHI_DIS_S            BIT(SPEC_CTRL_BHI_DIS_S_SHIFT)
 
 /* A mask for bits which the kernel toggles when controlling mitigations */
 #define SPEC_CTRL_MITIGATIONS_MASK     (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \
-                                                       | SPEC_CTRL_RRSBA_DIS_S)
+                                                       | SPEC_CTRL_RRSBA_DIS_S \
+                                                       | SPEC_CTRL_BHI_DIS_S)
 
 #define MSR_IA32_PRED_CMD              0x00000049 /* Prediction Command */
 #define PRED_CMD_IBPB                  BIT(0)     /* Indirect Branch Prediction Barrier */
                                                 * are restricted to targets in
                                                 * kernel.
                                                 */
+#define ARCH_CAP_BHI_NO                        BIT(20) /*
+                                                * CPU is not affected by Branch
+                                                * History Injection.
+                                                */
 #define ARCH_CAP_PBRSB_NO              BIT(24) /*
                                                 * Not susceptible to Post-Barrier
                                                 * Return Stack Buffer Predictions.
index 170c89ed22fcd3a27106d166a9e7f5a5d1fadf80..ff5f1ecc7d1e6512fcc34f4a6e5df5976e9087f0 100644 (file)
        ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
 .endm
 
+#ifdef CONFIG_X86_64
+.macro CLEAR_BRANCH_HISTORY
+       ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP
+.endm
+
+.macro CLEAR_BRANCH_HISTORY_VMEXIT
+       ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT
+.endm
+#else
+#define CLEAR_BRANCH_HISTORY
+#define CLEAR_BRANCH_HISTORY_VMEXIT
+#endif
+
 #else /* __ASSEMBLY__ */
 
 #define ANNOTATE_RETPOLINE_SAFE                                        \
@@ -368,6 +381,10 @@ extern void srso_alias_return_thunk(void);
 extern void entry_untrain_ret(void);
 extern void entry_ibpb(void);
 
+#ifdef CONFIG_X86_64
+extern void clear_bhb_loop(void);
+#endif
+
 extern void (*x86_return_thunk)(void);
 
 extern void __warn_thunk(void);
index f44e2f9ab65d779f35bac9c5e58dd8b694778efc..2fc7bc3863ff6f7a932ac2ee05682a2ba71f3308 100644 (file)
 #include <asm/thread_info.h>   /* for TS_COMPAT */
 #include <asm/unistd.h>
 
+/* This is used purely for kernel/trace/trace_syscalls.c */
 typedef long (*sys_call_ptr_t)(const struct pt_regs *);
 extern const sys_call_ptr_t sys_call_table[];
 
-#if defined(CONFIG_X86_32)
-#define ia32_sys_call_table sys_call_table
-#else
 /*
  * These may not exist, but still put the prototypes in so we
  * can use IS_ENABLED().
  */
-extern const sys_call_ptr_t ia32_sys_call_table[];
-extern const sys_call_ptr_t x32_sys_call_table[];
-#endif
+extern long ia32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x32_sys_call(const struct pt_regs *, unsigned int nr);
+extern long x64_sys_call(const struct pt_regs *, unsigned int nr);
 
 /*
  * Only the low 32 bits of orig_ax are meaningful, so we return int.
@@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task)
 }
 
 bool do_syscall_64(struct pt_regs *regs, int nr);
+void do_int80_emulation(struct pt_regs *regs);
 
 #endif /* CONFIG_X86_32 */
 
index a42d8a6f7149588bc74213268733003bf7ccf470..c342c4aa9c6848c607238dad1ff07105737d5873 100644 (file)
@@ -1687,11 +1687,11 @@ static int x2apic_state;
 
 static bool x2apic_hw_locked(void)
 {
-       u64 ia32_cap;
+       u64 x86_arch_cap_msr;
        u64 msr;
 
-       ia32_cap = x86_read_arch_cap_msr();
-       if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) {
+       x86_arch_cap_msr = x86_read_arch_cap_msr();
+       if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) {
                rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr);
                return (msr & LEGACY_XAPIC_DISABLED);
        }
index 9bf17c9c29dad2e3f3c38c07253accc667cadea3..cb9eece55904d049edc600960bdaa0db58765459 100644 (file)
@@ -535,7 +535,6 @@ clear_sev:
 
 static void early_init_amd(struct cpuinfo_x86 *c)
 {
-       u64 value;
        u32 dummy;
 
        if (c->x86 >= 0xf)
@@ -603,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c)
 
        early_detect_mem_encrypt(c);
 
-       /* Re-enable TopologyExtensions if switched off by BIOS */
-       if (c->x86 == 0x15 &&
-           (c->x86_model >= 0x10 && c->x86_model <= 0x6f) &&
-           !cpu_has(c, X86_FEATURE_TOPOEXT)) {
-
-               if (msr_set_bit(0xc0011005, 54) > 0) {
-                       rdmsrl(0xc0011005, value);
-                       if (value & BIT_64(54)) {
-                               set_cpu_cap(c, X86_FEATURE_TOPOEXT);
-                               pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
-                       }
-               }
-       }
-
        if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) {
                if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB))
                        setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
index e7ba936d798b8198f5837118d5bb33d40389ccc7..ca295b0c1eeee05b812c27bb88bd814dba3c1f00 100644 (file)
@@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current);
 u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
 EXPORT_SYMBOL_GPL(x86_pred_cmd);
 
+static u64 __ro_after_init x86_arch_cap_msr;
+
 static DEFINE_MUTEX(spec_ctrl_mutex);
 
 void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk;
@@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void)
                x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK;
        }
 
+       x86_arch_cap_msr = x86_read_arch_cap_msr();
+
        /* Select the proper CPU mitigations before patching alternatives: */
        spectre_v1_select_mitigation();
        spectre_v2_select_mitigation();
@@ -301,8 +305,6 @@ static const char * const taa_strings[] = {
 
 static void __init taa_select_mitigation(void)
 {
-       u64 ia32_cap;
-
        if (!boot_cpu_has_bug(X86_BUG_TAA)) {
                taa_mitigation = TAA_MITIGATION_OFF;
                return;
@@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void)
         * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode
         * update is required.
         */
-       ia32_cap = x86_read_arch_cap_msr();
-       if ( (ia32_cap & ARCH_CAP_MDS_NO) &&
-           !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR))
+       if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) &&
+           !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))
                taa_mitigation = TAA_MITIGATION_UCODE_NEEDED;
 
        /*
@@ -401,8 +402,6 @@ static const char * const mmio_strings[] = {
 
 static void __init mmio_select_mitigation(void)
 {
-       u64 ia32_cap;
-
        if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
             boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
             cpu_mitigations_off()) {
@@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void)
        if (mmio_mitigation == MMIO_MITIGATION_OFF)
                return;
 
-       ia32_cap = x86_read_arch_cap_msr();
-
        /*
         * Enable CPU buffer clear mitigation for host and VMM, if also affected
         * by MDS or TAA. Otherwise, enable mitigation for VMM only.
@@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void)
         * be propagated to uncore buffers, clearing the Fill buffers on idle
         * is required irrespective of SMT state.
         */
-       if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+       if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
                static_branch_enable(&mds_idle_clear);
 
        /*
@@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void)
         * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
         * affected systems.
         */
-       if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+       if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
            (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
             boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
-            !(ia32_cap & ARCH_CAP_MDS_NO)))
+            !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
                mmio_mitigation = MMIO_MITIGATION_VERW;
        else
                mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
@@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void)
        if (rfds_mitigation == RFDS_MITIGATION_OFF)
                return;
 
-       if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR)
+       if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
                setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
        else
                rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED;
@@ -659,8 +656,6 @@ void update_srbds_msr(void)
 
 static void __init srbds_select_mitigation(void)
 {
-       u64 ia32_cap;
-
        if (!boot_cpu_has_bug(X86_BUG_SRBDS))
                return;
 
@@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void)
         * are only exposed to SRBDS when TSX is enabled or when CPU is affected
         * by Processor MMIO Stale Data vulnerability.
         */
-       ia32_cap = x86_read_arch_cap_msr();
-       if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+       if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
            !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
                srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
        else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
@@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void)
        /* Will verify below that mitigation _can_ be disabled */
 
        /* No microcode */
-       if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+       if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) {
                if (gds_mitigation == GDS_MITIGATION_FORCE) {
                        /*
                         * This only needs to be done on the boot CPU so do it
@@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
        return SPECTRE_V2_RETPOLINE;
 }
 
+static bool __ro_after_init rrsba_disabled;
+
 /* Disable in-kernel use of non-RSB RET predictors */
 static void __init spec_ctrl_disable_kernel_rrsba(void)
 {
-       u64 ia32_cap;
+       if (rrsba_disabled)
+               return;
 
-       if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+       if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) {
+               rrsba_disabled = true;
                return;
+       }
 
-       ia32_cap = x86_read_arch_cap_msr();
+       if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+               return;
 
-       if (ia32_cap & ARCH_CAP_RRSBA) {
-               x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
-               update_spec_ctrl(x86_spec_ctrl_base);
-       }
+       x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+       update_spec_ctrl(x86_spec_ctrl_base);
+       rrsba_disabled = true;
 }
 
 static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode)
@@ -1607,6 +1606,73 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_
        dump_stack();
 }
 
+/*
+ * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by
+ * branch history in userspace. Not needed if BHI_NO is set.
+ */
+static bool __init spec_ctrl_bhi_dis(void)
+{
+       if (!boot_cpu_has(X86_FEATURE_BHI_CTRL))
+               return false;
+
+       x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S;
+       update_spec_ctrl(x86_spec_ctrl_base);
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW);
+
+       return true;
+}
+
+enum bhi_mitigations {
+       BHI_MITIGATION_OFF,
+       BHI_MITIGATION_ON,
+};
+
+static enum bhi_mitigations bhi_mitigation __ro_after_init =
+       IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF;
+
+static int __init spectre_bhi_parse_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (!strcmp(str, "off"))
+               bhi_mitigation = BHI_MITIGATION_OFF;
+       else if (!strcmp(str, "on"))
+               bhi_mitigation = BHI_MITIGATION_ON;
+       else
+               pr_err("Ignoring unknown spectre_bhi option (%s)", str);
+
+       return 0;
+}
+early_param("spectre_bhi", spectre_bhi_parse_cmdline);
+
+static void __init bhi_select_mitigation(void)
+{
+       if (bhi_mitigation == BHI_MITIGATION_OFF)
+               return;
+
+       /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */
+       if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
+               spec_ctrl_disable_kernel_rrsba();
+               if (rrsba_disabled)
+                       return;
+       }
+
+       if (spec_ctrl_bhi_dis())
+               return;
+
+       if (!IS_ENABLED(CONFIG_X86_64))
+               return;
+
+       /* Mitigate KVM by default */
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT);
+       pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n");
+
+       /* Mitigate syscalls when the mitigation is forced =on */
+       setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP);
+       pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n");
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
        enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -1718,6 +1784,9 @@ static void __init spectre_v2_select_mitigation(void)
            mode == SPECTRE_V2_RETPOLINE)
                spec_ctrl_disable_kernel_rrsba();
 
+       if (boot_cpu_has(X86_BUG_BHI))
+               bhi_select_mitigation();
+
        spectre_v2_enabled = mode;
        pr_info("%s\n", spectre_v2_strings[mode]);
 
@@ -1832,8 +1901,6 @@ static void update_indir_branch_cond(void)
 /* Update the static key controlling the MDS CPU buffer clear in idle */
 static void update_mds_branch_idle(void)
 {
-       u64 ia32_cap = x86_read_arch_cap_msr();
-
        /*
         * Enable the idle clearing if SMT is active on CPUs which are
         * affected only by MSBDS and not any other MDS variant.
@@ -1848,7 +1915,7 @@ static void update_mds_branch_idle(void)
        if (sched_smt_active()) {
                static_branch_enable(&mds_idle_clear);
        } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
-                  (ia32_cap & ARCH_CAP_FBSDP_NO)) {
+                  (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) {
                static_branch_disable(&mds_idle_clear);
        }
 }
@@ -2695,15 +2762,15 @@ static char *stibp_state(void)
 
        switch (spectre_v2_user_stibp) {
        case SPECTRE_V2_USER_NONE:
-               return ", STIBP: disabled";
+               return "; STIBP: disabled";
        case SPECTRE_V2_USER_STRICT:
-               return ", STIBP: forced";
+               return "; STIBP: forced";
        case SPECTRE_V2_USER_STRICT_PREFERRED:
-               return ", STIBP: always-on";
+               return "; STIBP: always-on";
        case SPECTRE_V2_USER_PRCTL:
        case SPECTRE_V2_USER_SECCOMP:
                if (static_key_enabled(&switch_to_cond_stibp))
-                       return ", STIBP: conditional";
+                       return "; STIBP: conditional";
        }
        return "";
 }
@@ -2712,10 +2779,10 @@ static char *ibpb_state(void)
 {
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                if (static_key_enabled(&switch_mm_always_ibpb))
-                       return ", IBPB: always-on";
+                       return "; IBPB: always-on";
                if (static_key_enabled(&switch_mm_cond_ibpb))
-                       return ", IBPB: conditional";
-               return ", IBPB: disabled";
+                       return "; IBPB: conditional";
+               return "; IBPB: disabled";
        }
        return "";
 }
@@ -2725,14 +2792,30 @@ static char *pbrsb_eibrs_state(void)
        if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) {
                if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) ||
                    boot_cpu_has(X86_FEATURE_RSB_VMEXIT))
-                       return ", PBRSB-eIBRS: SW sequence";
+                       return "; PBRSB-eIBRS: SW sequence";
                else
-                       return ", PBRSB-eIBRS: Vulnerable";
+                       return "; PBRSB-eIBRS: Vulnerable";
        } else {
-               return ", PBRSB-eIBRS: Not affected";
+               return "; PBRSB-eIBRS: Not affected";
        }
 }
 
+static const char *spectre_bhi_state(void)
+{
+       if (!boot_cpu_has_bug(X86_BUG_BHI))
+               return "; BHI: Not affected";
+       else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW))
+               return "; BHI: BHI_DIS_S";
+       else if  (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP))
+               return "; BHI: SW loop, KVM: SW loop";
+       else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && rrsba_disabled)
+               return "; BHI: Retpoline";
+       else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT))
+               return "; BHI: Vulnerable, KVM: SW loop";
+
+       return "; BHI: Vulnerable";
+}
+
 static ssize_t spectre_v2_show_state(char *buf)
 {
        if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
@@ -2745,13 +2828,15 @@ static ssize_t spectre_v2_show_state(char *buf)
            spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
                return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
 
-       return sysfs_emit(buf, "%s%s%s%s%s%s%s\n",
+       return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
                          spectre_v2_strings[spectre_v2_enabled],
                          ibpb_state(),
-                         boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+                         boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
                          stibp_state(),
-                         boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+                         boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
                          pbrsb_eibrs_state(),
+                         spectre_bhi_state(),
+                         /* this should always be at the end */
                          spectre_v2_module_string());
 }
 
index 5c1e6d6be267af3e7b489e9f71937e7be6b25448..605c26c009c8ac61c8560231ea6b35d2381ff2aa 100644 (file)
@@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
 #define NO_SPECTRE_V2          BIT(8)
 #define NO_MMIO                        BIT(9)
 #define NO_EIBRS_PBRSB         BIT(10)
+#define NO_BHI                 BIT(11)
 
 #define VULNWL(vendor, family, model, whitelist)       \
        X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist)
@@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
        VULNWL_INTEL(ATOM_TREMONT_D,            NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB),
 
        /* AMD Family 0xf - 0x12 */
-       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
-       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
-       VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
-       VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO),
+       VULNWL_AMD(0x0f,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+       VULNWL_AMD(0x10,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+       VULNWL_AMD(0x11,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
+       VULNWL_AMD(0x12,        NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI),
 
        /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */
-       VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
-       VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB),
+       VULNWL_AMD(X86_FAMILY_ANY,      NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
+       VULNWL_HYGON(X86_FAMILY_ANY,    NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI),
 
        /* Zhaoxin Family 7 */
-       VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
-       VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO),
+       VULNWL(CENTAUR, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
+       VULNWL(ZHAOXIN, 7, X86_MODEL_ANY,       NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI),
        {}
 };
 
@@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi
 
 u64 x86_read_arch_cap_msr(void)
 {
-       u64 ia32_cap = 0;
+       u64 x86_arch_cap_msr = 0;
 
        if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
-               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+               rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr);
 
-       return ia32_cap;
+       return x86_arch_cap_msr;
 }
 
-static bool arch_cap_mmio_immune(u64 ia32_cap)
+static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr)
 {
-       return (ia32_cap & ARCH_CAP_FBSDP_NO &&
-               ia32_cap & ARCH_CAP_PSDP_NO &&
-               ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+       return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO &&
+               x86_arch_cap_msr & ARCH_CAP_PSDP_NO &&
+               x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO);
 }
 
-static bool __init vulnerable_to_rfds(u64 ia32_cap)
+static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr)
 {
        /* The "immunity" bit trumps everything else: */
-       if (ia32_cap & ARCH_CAP_RFDS_NO)
+       if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO)
                return false;
 
        /*
@@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
         * indicate that mitigation is needed because guest is running on a
         * vulnerable hardware or may migrate to such hardware:
         */
-       if (ia32_cap & ARCH_CAP_RFDS_CLEAR)
+       if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR)
                return true;
 
        /* Only consult the blacklist when there is no enumeration: */
@@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap)
 
 static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 {
-       u64 ia32_cap = x86_read_arch_cap_msr();
+       u64 x86_arch_cap_msr = x86_read_arch_cap_msr();
 
        /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */
        if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) &&
-           !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO))
+           !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO))
                setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT);
 
        if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION))
@@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
                setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
 
        if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) &&
-           !(ia32_cap & ARCH_CAP_SSB_NO) &&
+           !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) &&
           !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
                setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
 
@@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * Don't use AutoIBRS when SNP is enabled because it degrades host
         * userspace indirect branch performance.
         */
-       if ((ia32_cap & ARCH_CAP_IBRS_ALL) ||
+       if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) ||
            (cpu_has(c, X86_FEATURE_AUTOIBRS) &&
             !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) {
                setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED);
                if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) &&
-                   !(ia32_cap & ARCH_CAP_PBRSB_NO))
+                   !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO))
                        setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB);
        }
 
        if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) &&
-           !(ia32_cap & ARCH_CAP_MDS_NO)) {
+           !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) {
                setup_force_cpu_bug(X86_BUG_MDS);
                if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY))
                        setup_force_cpu_bug(X86_BUG_MSBDS_ONLY);
@@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * TSX_CTRL check alone is not sufficient for cases when the microcode
         * update is not present or running as guest that don't get TSX_CTRL.
         */
-       if (!(ia32_cap & ARCH_CAP_TAA_NO) &&
+       if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) &&
            (cpu_has(c, X86_FEATURE_RTM) ||
-            (ia32_cap & ARCH_CAP_TSX_CTRL_MSR)))
+            (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)))
                setup_force_cpu_bug(X86_BUG_TAA);
 
        /*
@@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
         * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
         */
-       if (!arch_cap_mmio_immune(ia32_cap)) {
+       if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
                if (cpu_matches(cpu_vuln_blacklist, MMIO))
                        setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
                else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
@@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
        }
 
        if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
-               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+               if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA))
                        setup_force_cpu_bug(X86_BUG_RETBLEED);
        }
 
@@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
         * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
         * which means that AVX will be disabled.
         */
-       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+       if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) &&
            boot_cpu_has(X86_FEATURE_AVX))
                setup_force_cpu_bug(X86_BUG_GDS);
 
-       if (vulnerable_to_rfds(ia32_cap))
+       if (vulnerable_to_rfds(x86_arch_cap_msr))
                setup_force_cpu_bug(X86_BUG_RFDS);
 
+       /* When virtualized, eIBRS could be hidden, assume vulnerable */
+       if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) &&
+           !cpu_matches(cpu_vuln_whitelist, NO_BHI) &&
+           (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) ||
+            boot_cpu_has(X86_FEATURE_HYPERVISOR)))
+               setup_force_cpu_bug(X86_BUG_BHI);
+
        if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
                return;
 
        /* Rogue Data Cache Load? No! */
-       if (ia32_cap & ARCH_CAP_RDCL_NO)
+       if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO)
                return;
 
        setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
index a515328d9d7d88b802f588bf678d098e0ba53b86..af5aa2c754c22226080870967d6c410067c86447 100644 (file)
@@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_EPB,              CPUID_ECX,  3, 0x00000006, 0 },
        { X86_FEATURE_INTEL_PPIN,       CPUID_EBX,  0, 0x00000007, 1 },
        { X86_FEATURE_RRSBA_CTRL,       CPUID_EDX,  2, 0x00000007, 2 },
+       { X86_FEATURE_BHI_CTRL,         CPUID_EDX,  4, 0x00000007, 2 },
        { X86_FEATURE_CQM_LLC,          CPUID_EDX,  1, 0x0000000f, 0 },
        { X86_FEATURE_CQM_OCCUP_LLC,    CPUID_EDX,  0, 0x0000000f, 1 },
        { X86_FEATURE_CQM_MBM_TOTAL,    CPUID_EDX,  1, 0x0000000f, 1 },
index aaca8d235dc2bbee08ab6de2bdb91b231963a7f6..d17c9b71eb4a253eac42acee5a49f3811c83aaff 100644 (file)
@@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id)
        early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id;
        early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id;
 #endif
-       set_cpu_possible(cpu, true);
        set_cpu_present(cpu, true);
 }
 
@@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present)
                topo_info.nr_disabled_cpus++;
        }
 
-       /* Register present and possible CPUs in the domain maps */
+       /*
+        * Register present and possible CPUs in the domain
+        * maps. cpu_possible_map will be updated in
+        * topology_init_possible_cpus() after enumeration is done.
+        */
        for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++)
                set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map);
 }
index 1a8b3ad493afef8eeeea65fe5dba8673517f1240..a7aa6eff4ae5ba26206208479f7530721eebda2d 100644 (file)
@@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan)
        if (!sft)
                sft = get_count_order(ecx.cpu_nthreads + 1);
 
-       topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1);
+       /*
+        * cpu_nthreads describes the number of threads in the package
+        * sft is the number of APIC ID bits per package
+        *
+        * As the number of actual threads per core is not described in
+        * this leaf, just set the CORE domain shift and let the later
+        * parsers set SMT shift. Assume one thread per core by default
+        * which is correct if there are no other CPUID leafs to parse.
+        */
+       topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1);
+       topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1);
        return true;
 }
 
-static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id)
+static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id)
 {
        /*
         * Starting with Fam 17h the DIE domain could probably be used to
@@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
        tscan->c->topo.initial_apicid = leaf.ext_apic_id;
 
        /*
-        * If leaf 0xb is available, then SMT shift is set already. If not
-        * take it from ecx.threads_per_core and use topo_update_dom() -
-        * topology_set_dom() would propagate and overwrite the already
-        * propagated CORE level.
+        * If leaf 0xb is available, then the domain shifts are set
+        * already and nothing to do here.
         */
        if (!has_0xb) {
+               /*
+                * Leaf 0x80000008 set the CORE domain shift already.
+                * Update the SMT domain, but do not propagate it.
+                */
                unsigned int nthreads = leaf.core_nthreads + 1;
 
                topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads);
@@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb)
 
 static bool parse_fam10h_node_id(struct topo_scan *tscan)
 {
-       struct {
-               union {
+       union {
+               struct {
                        u64     node_id         :  3,
                                nodes_per_pkg   :  3,
                                unused          : 58;
-                       u64     msr;
                };
+               u64             msr;
        } nid;
 
        if (!boot_cpu_has(X86_FEATURE_NODEID_MSR))
@@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan)
        tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN];
 }
 
+static void topoext_fixup(struct topo_scan *tscan)
+{
+       struct cpuinfo_x86 *c = tscan->c;
+       u64 msrval;
+
+       /* Try to re-enable TopologyExtensions if switched off by BIOS */
+       if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD ||
+           c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f)
+               return;
+
+       if (msr_set_bit(0xc0011005, 54) <= 0)
+               return;
+
+       rdmsrl(0xc0011005, msrval);
+       if (msrval & BIT_64(54)) {
+               set_cpu_cap(c, X86_FEATURE_TOPOEXT);
+               pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n");
+       }
+}
+
 static void parse_topology_amd(struct topo_scan *tscan)
 {
        bool has_0xb = false;
@@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan)
 void cpu_parse_topology_amd(struct topo_scan *tscan)
 {
        tscan->amd_nodes_per_pkg = 1;
+       topoext_fixup(tscan);
        parse_topology_amd(tscan);
 
        if (tscan->amd_nodes_per_pkg > 1)
index 58ac8d69c94bd124001861a1b2e06de8f3fd41b8..2f4e155080badc5efdbcc93fbc909c5bbcf70094 100644 (file)
@@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs {
 #define X86_FEATURE_IPRED_CTRL         KVM_X86_FEATURE(CPUID_7_2_EDX, 1)
 #define KVM_X86_FEATURE_RRSBA_CTRL     KVM_X86_FEATURE(CPUID_7_2_EDX, 2)
 #define X86_FEATURE_DDPD_U             KVM_X86_FEATURE(CPUID_7_2_EDX, 3)
-#define X86_FEATURE_BHI_CTRL           KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
+#define KVM_X86_FEATURE_BHI_CTRL       KVM_X86_FEATURE(CPUID_7_2_EDX, 4)
 #define X86_FEATURE_MCDT_NO            KVM_X86_FEATURE(CPUID_7_2_EDX, 5)
 
 /* CPUID level 0x80000007 (EDX). */
@@ -128,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature)
        KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC);
        KVM_X86_TRANSLATE_FEATURE(PERFMON_V2);
        KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL);
+       KVM_X86_TRANSLATE_FEATURE(BHI_CTRL);
        default:
                return x86_feature;
        }
index 2bfbf758d06110f49c71a22c1f54da9d9499669a..f6986dee6f8c7c52622857f131adf766d1528121 100644 (file)
@@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
 
        call vmx_spec_ctrl_restore_host
 
+       CLEAR_BRANCH_HISTORY_VMEXIT
+
        /* Put return value in AX */
        mov %_ASM_BX, %_ASM_AX
 
index 47d9f03b7778373393b9853fe32b153dadd9de29..984ea2089efc3132154527508976879a4e11cb10 100644 (file)
@@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
         ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
         ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
         ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \
-        ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR)
+        ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO)
 
 static u64 kvm_get_arch_capabilities(void)
 {
index bdbb557feb5a0ec949e7ac8cde0e87b6d4055f5b..059467086b13123b26630c1e84942980f3001216 100644 (file)
@@ -1409,6 +1409,12 @@ static int blkcg_css_online(struct cgroup_subsys_state *css)
        return 0;
 }
 
+void blkg_init_queue(struct request_queue *q)
+{
+       INIT_LIST_HEAD(&q->blkg_list);
+       mutex_init(&q->blkcg_mutex);
+}
+
 int blkcg_init_disk(struct gendisk *disk)
 {
        struct request_queue *q = disk->queue;
@@ -1416,9 +1422,6 @@ int blkcg_init_disk(struct gendisk *disk)
        bool preloaded;
        int ret;
 
-       INIT_LIST_HEAD(&q->blkg_list);
-       mutex_init(&q->blkcg_mutex);
-
        new_blkg = blkg_alloc(&blkcg_root, disk, GFP_KERNEL);
        if (!new_blkg)
                return -ENOMEM;
index 78b74106bf10c5cbadd655e2da6b2f21416c0622..90b3959d88cfa4a13026b7262001dd1cb030dcf5 100644 (file)
@@ -189,6 +189,7 @@ struct blkcg_policy {
 extern struct blkcg blkcg_root;
 extern bool blkcg_debug_stats;
 
+void blkg_init_queue(struct request_queue *q);
 int blkcg_init_disk(struct gendisk *disk);
 void blkcg_exit_disk(struct gendisk *disk);
 
@@ -482,6 +483,7 @@ struct blkcg {
 };
 
 static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
+static inline void blkg_init_queue(struct request_queue *q) { }
 static inline int blkcg_init_disk(struct gendisk *disk) { return 0; }
 static inline void blkcg_exit_disk(struct gendisk *disk) { }
 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
index a16b5abdbbf56f44611d34fd238c0ee3a00d72f5..b795ac177281ad7adec63528d53def2fff1139a5 100644 (file)
@@ -442,6 +442,8 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
        init_waitqueue_head(&q->mq_freeze_wq);
        mutex_init(&q->mq_freeze_lock);
 
+       blkg_init_queue(q);
+
        /*
         * Init percpu_ref in atomic mode so that it's faster to shutdown.
         * See blk_register_queue() for details.
@@ -1195,6 +1197,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
        if (unlikely(!rq_list_empty(plug->cached_rq)))
                blk_mq_free_plug_rqs(plug);
 
+       plug->cur_ktime = 0;
        current->flags &= ~PF_BLOCK_TS;
 }
 
index 9a85bfbbc45a018e941cd0b778ab612a54cdea09..baa20c85799d54a86df05aa412c2e38849a800b4 100644 (file)
@@ -1347,7 +1347,7 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
 {
        struct ioc *ioc = iocg->ioc;
        struct blkcg_gq *blkg = iocg_to_blkg(iocg);
-       u64 tdelta, delay, new_delay;
+       u64 tdelta, delay, new_delay, shift;
        s64 vover, vover_pct;
        u32 hwa;
 
@@ -1362,8 +1362,9 @@ static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
 
        /* calculate the current delay in effect - 1/2 every second */
        tdelta = now->now - iocg->delay_at;
-       if (iocg->delay)
-               delay = iocg->delay >> div64_u64(tdelta, USEC_PER_SEC);
+       shift = div64_u64(tdelta, USEC_PER_SEC);
+       if (iocg->delay && shift < BITS_PER_LONG)
+               delay = iocg->delay >> shift;
        else
                delay = 0;
 
index cdbaef159c4bc3e2f713ac8541a36450271678e7..d2731843f2fccb481eda94e1a1dc980051d2486a 100644 (file)
@@ -182,17 +182,13 @@ static int blk_validate_limits(struct queue_limits *lim)
                return -EINVAL;
 
        /*
-        * Devices that require a virtual boundary do not support scatter/gather
-        * I/O natively, but instead require a descriptor list entry for each
-        * page (which might not be identical to the Linux PAGE_SIZE).  Because
-        * of that they are not limited by our notion of "segment size".
+        * Stacking device may have both virtual boundary and max segment
+        * size limit, so allow this setting now, and long-term the two
+        * might need to move out of stacking limits since we have immutable
+        * bvec and lower layer bio splitting is supposed to handle the two
+        * correctly.
         */
-       if (lim->virt_boundary_mask) {
-               if (WARN_ON_ONCE(lim->max_segment_size &&
-                                lim->max_segment_size != UINT_MAX))
-                       return -EINVAL;
-               lim->max_segment_size = UINT_MAX;
-       } else {
+       if (!lim->virt_boundary_mask) {
                /*
                 * The maximum segment size has an odd historic 64k default that
                 * drivers probably should override.  Just like the I/O size we
index 39f6d1b98fd6a50d5d9df2defe305a23b36f9bcf..51d3f1a55d024cf5600ebd833bdf8ef5ee0627c1 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/firmware.h>
@@ -131,22 +131,6 @@ static int ivpu_get_capabilities(struct ivpu_device *vdev, struct drm_ivpu_param
        return 0;
 }
 
-static int ivpu_get_core_clock_rate(struct ivpu_device *vdev, u64 *clk_rate)
-{
-       int ret;
-
-       ret = ivpu_rpm_get_if_active(vdev);
-       if (ret < 0)
-               return ret;
-
-       *clk_rate = ret ? ivpu_hw_reg_pll_freq_get(vdev) : 0;
-
-       if (ret)
-               ivpu_rpm_put(vdev);
-
-       return 0;
-}
-
 static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
 {
        struct ivpu_file_priv *file_priv = file->driver_priv;
@@ -170,7 +154,7 @@ static int ivpu_get_param_ioctl(struct drm_device *dev, void *data, struct drm_f
                args->value = vdev->platform;
                break;
        case DRM_IVPU_PARAM_CORE_CLOCK_RATE:
-               ret = ivpu_get_core_clock_rate(vdev, &args->value);
+               args->value = ivpu_hw_ratio_to_freq(vdev, vdev->hw->pll.max_ratio);
                break;
        case DRM_IVPU_PARAM_NUM_CONTEXTS:
                args->value = ivpu_get_context_count(vdev);
@@ -387,12 +371,15 @@ int ivpu_shutdown(struct ivpu_device *vdev)
 {
        int ret;
 
-       ivpu_prepare_for_reset(vdev);
+       /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
+       pci_save_state(to_pci_dev(vdev->drm.dev));
 
        ret = ivpu_hw_power_down(vdev);
        if (ret)
                ivpu_warn(vdev, "Failed to power down HW: %d\n", ret);
 
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+
        return ret;
 }
 
@@ -530,7 +517,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
        vdev->context_xa_limit.min = IVPU_USER_CONTEXT_MIN_SSID;
        vdev->context_xa_limit.max = IVPU_USER_CONTEXT_MAX_SSID;
        atomic64_set(&vdev->unique_id_counter, 0);
-       xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
+       xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ);
        xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
        xa_init_flags(&vdev->db_xa, XA_FLAGS_ALLOC1);
        lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
@@ -560,11 +547,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
        /* Power up early so the rest of init code can access VPU registers */
        ret = ivpu_hw_power_up(vdev);
        if (ret)
-               goto err_power_down;
+               goto err_shutdown;
 
        ret = ivpu_mmu_global_context_init(vdev);
        if (ret)
-               goto err_power_down;
+               goto err_shutdown;
 
        ret = ivpu_mmu_init(vdev);
        if (ret)
@@ -601,10 +588,8 @@ err_mmu_rctx_fini:
        ivpu_mmu_reserved_context_fini(vdev);
 err_mmu_gctx_fini:
        ivpu_mmu_global_context_fini(vdev);
-err_power_down:
-       ivpu_hw_power_down(vdev);
-       if (IVPU_WA(d3hot_after_power_off))
-               pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+err_shutdown:
+       ivpu_shutdown(vdev);
 err_xa_destroy:
        xa_destroy(&vdev->db_xa);
        xa_destroy(&vdev->submitted_jobs_xa);
@@ -628,9 +613,8 @@ static void ivpu_bo_unbind_all_user_contexts(struct ivpu_device *vdev)
 static void ivpu_dev_fini(struct ivpu_device *vdev)
 {
        ivpu_pm_disable(vdev);
+       ivpu_prepare_for_reset(vdev);
        ivpu_shutdown(vdev);
-       if (IVPU_WA(d3hot_after_power_off))
-               pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
 
        ivpu_jobs_abort_all(vdev);
        ivpu_job_done_consumer_fini(vdev);
index 7be0500d9bb8919574b02066b8389c56c6c83f05..bb4374d0eaecc9a25d2f6b28056aa5d8d762bd15 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #ifndef __IVPU_DRV_H__
@@ -90,7 +90,6 @@
 struct ivpu_wa_table {
        bool punit_disabled;
        bool clear_runtime_mem;
-       bool d3hot_after_power_off;
        bool interrupt_clear_with_0;
        bool disable_clock_relinquish;
        bool disable_d0i3_msg;
index b2909168a0a6902b4fb061910796ac19d5caf6e1..094c659d2800b127bf1c616e34973673c1f55061 100644 (file)
@@ -21,6 +21,7 @@ struct ivpu_hw_ops {
        u32 (*profiling_freq_get)(struct ivpu_device *vdev);
        void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
        u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
+       u32 (*ratio_to_freq)(struct ivpu_device *vdev, u32 ratio);
        u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
        u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
        u32 (*reg_telemetry_enable_get)(struct ivpu_device *vdev);
@@ -130,6 +131,11 @@ static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
        return vdev->hw->ops->reg_pll_freq_get(vdev);
 };
 
+static inline u32 ivpu_hw_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+       return vdev->hw->ops->ratio_to_freq(vdev, ratio);
+}
+
 static inline u32 ivpu_hw_reg_telemetry_offset_get(struct ivpu_device *vdev)
 {
        return vdev->hw->ops->reg_telemetry_offset_get(vdev);
index 9a0c9498baba293cece13e9584f21f7b2067c681..bd25e2d9fb0f45a35d9ef9ca7ca16f14aa151521 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include "ivpu_drv.h"
@@ -75,7 +75,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
 {
        vdev->wa.punit_disabled = false;
        vdev->wa.clear_runtime_mem = false;
-       vdev->wa.d3hot_after_power_off = true;
 
        REGB_WR32(VPU_37XX_BUTTRESS_INTERRUPT_STAT, BUTTRESS_ALL_IRQ_MASK);
        if (REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) == BUTTRESS_ALL_IRQ_MASK) {
@@ -86,7 +85,6 @@ static void ivpu_hw_wa_init(struct ivpu_device *vdev)
 
        IVPU_PRINT_WA(punit_disabled);
        IVPU_PRINT_WA(clear_runtime_mem);
-       IVPU_PRINT_WA(d3hot_after_power_off);
        IVPU_PRINT_WA(interrupt_clear_with_0);
 }
 
@@ -805,12 +803,12 @@ static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool ena
        /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
 }
 
-static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
+static u32 ivpu_hw_37xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
 {
        u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
        u32 cpu_clock;
 
-       if ((config & 0xff) == PLL_RATIO_4_3)
+       if ((vdev->hw->config & 0xff) == PLL_RATIO_4_3)
                cpu_clock = pll_clock * 2 / 4;
        else
                cpu_clock = pll_clock * 2 / 5;
@@ -829,7 +827,7 @@ static u32 ivpu_hw_37xx_reg_pll_freq_get(struct ivpu_device *vdev)
        if (!ivpu_is_silicon(vdev))
                return PLL_SIMULATION_FREQ;
 
-       return ivpu_hw_37xx_pll_to_freq(pll_curr_ratio, vdev->hw->config);
+       return ivpu_hw_37xx_ratio_to_freq(vdev, pll_curr_ratio);
 }
 
 static u32 ivpu_hw_37xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
@@ -1052,6 +1050,7 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
        .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
        .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
        .reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
+       .ratio_to_freq = ivpu_hw_37xx_ratio_to_freq,
        .reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
        .reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
        .reg_telemetry_enable_get = ivpu_hw_37xx_reg_telemetry_enable_get,
index e4eddbf5d11c250bb8ddd2a27843242166896217..b0b88d4c89264a0a95f18edc9b140d720c89279d 100644 (file)
@@ -980,6 +980,11 @@ static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
        return PLL_RATIO_TO_FREQ(pll_curr_ratio);
 }
 
+static u32 ivpu_hw_40xx_ratio_to_freq(struct ivpu_device *vdev, u32 ratio)
+{
+       return PLL_RATIO_TO_FREQ(ratio);
+}
+
 static u32 ivpu_hw_40xx_reg_telemetry_offset_get(struct ivpu_device *vdev)
 {
        return REGB_RD32(VPU_40XX_BUTTRESS_VPU_TELEMETRY_OFFSET);
@@ -1230,6 +1235,7 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
        .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
        .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
        .reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
+       .ratio_to_freq = ivpu_hw_40xx_ratio_to_freq,
        .reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
        .reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
        .reg_telemetry_enable_get = ivpu_hw_40xx_reg_telemetry_enable_get,
index 04ac4b9840fbe56341e1552c2783715a83b58e7c..56ff067f63e29559d2e0605645c97bb1a0391142 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/genalloc.h>
@@ -501,7 +501,11 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
        spin_lock_init(&ipc->cons_lock);
        INIT_LIST_HEAD(&ipc->cons_list);
        INIT_LIST_HEAD(&ipc->cb_msg_list);
-       drmm_mutex_init(&vdev->drm, &ipc->lock);
+       ret = drmm_mutex_init(&vdev->drm, &ipc->lock);
+       if (ret) {
+               ivpu_err(vdev, "Failed to initialize ipc->lock, ret %d\n", ret);
+               goto err_free_rx;
+       }
        ivpu_ipc_reset(vdev);
        return 0;
 
index 91bd640655ab363b51df17a25cb9589293adc804..2e46b322c4505ea5f18997d0ef969f43239f72c8 100644 (file)
@@ -278,7 +278,7 @@ static const char *ivpu_mmu_event_to_str(u32 cmd)
        case IVPU_MMU_EVT_F_VMS_FETCH:
                return "Fetch of VMS caused external abort";
        default:
-               return "Unknown CMDQ command";
+               return "Unknown event";
        }
 }
 
@@ -286,15 +286,15 @@ static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
 {
        switch (err) {
        case IVPU_MMU_CERROR_NONE:
-               return "No CMDQ Error";
+               return "No error";
        case IVPU_MMU_CERROR_ILL:
                return "Illegal command";
        case IVPU_MMU_CERROR_ABT:
-               return "External abort on CMDQ read";
+               return "External abort on command queue read";
        case IVPU_MMU_CERROR_ATC_INV_SYNC:
                return "Sync failed to complete ATS invalidation";
        default:
-               return "Unknown CMDQ Error";
+               return "Unknown error";
        }
 }
 
index 7cce1c928a7f4e8386344fd81d58e7893f72c050..4f5ea466731ffe6b5b2ea178ae907274f26f5b62 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
  */
 
 #include <linux/highmem.h>
@@ -58,14 +58,11 @@ static int ivpu_suspend(struct ivpu_device *vdev)
 {
        int ret;
 
-       /* Save PCI state before powering down as it sometimes gets corrupted if NPU hangs */
-       pci_save_state(to_pci_dev(vdev->drm.dev));
+       ivpu_prepare_for_reset(vdev);
 
        ret = ivpu_shutdown(vdev);
        if (ret)
-               ivpu_err(vdev, "Failed to shutdown VPU: %d\n", ret);
-
-       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
+               ivpu_err(vdev, "Failed to shutdown NPU: %d\n", ret);
 
        return ret;
 }
@@ -74,10 +71,10 @@ static int ivpu_resume(struct ivpu_device *vdev)
 {
        int ret;
 
-       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
+retry:
        pci_restore_state(to_pci_dev(vdev->drm.dev));
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D0);
 
-retry:
        ret = ivpu_hw_power_up(vdev);
        if (ret) {
                ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
@@ -100,6 +97,7 @@ err_mmu_disable:
        ivpu_mmu_disable(vdev);
 err_power_down:
        ivpu_hw_power_down(vdev);
+       pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
 
        if (!ivpu_fw_is_cold_boot(vdev)) {
                ivpu_pm_prepare_cold_boot(vdev);
index 7c157bf926956be5cabd6db7c708ff87759c7879..d1464324de9519cdb96e026f3733170788bb786d 100644 (file)
@@ -1843,7 +1843,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
                        if (dep->honor_dep)
                                adev->flags.honor_deps = 1;
 
-                       adev->dep_unmet++;
+                       if (!dep->met)
+                               adev->dep_unmet++;
                }
        }
 }
index 562302e2e57ce5a2651575ad1620b1725d654f6a..6548f10e61d9c72ca89180e011f8e495058302a1 100644 (file)
@@ -666,6 +666,87 @@ static int mobile_lpm_policy = -1;
 module_param(mobile_lpm_policy, int, 0644);
 MODULE_PARM_DESC(mobile_lpm_policy, "Default LPM policy for mobile chipsets");
 
+static char *ahci_mask_port_map;
+module_param_named(mask_port_map, ahci_mask_port_map, charp, 0444);
+MODULE_PARM_DESC(mask_port_map,
+                "32-bits port map masks to ignore controllers ports. "
+                "Valid values are: "
+                "\"<mask>\" to apply the same mask to all AHCI controller "
+                "devices, and \"<pci_dev>=<mask>,<pci_dev>=<mask>,...\" to "
+                "specify different masks for the controllers specified, "
+                "where <pci_dev> is the PCI ID of an AHCI controller in the "
+                "form \"domain:bus:dev.func\"");
+
+static void ahci_apply_port_map_mask(struct device *dev,
+                                    struct ahci_host_priv *hpriv, char *mask_s)
+{
+       unsigned int mask;
+
+       if (kstrtouint(mask_s, 0, &mask)) {
+               dev_err(dev, "Invalid port map mask\n");
+               return;
+       }
+
+       hpriv->mask_port_map = mask;
+}
+
+static void ahci_get_port_map_mask(struct device *dev,
+                                  struct ahci_host_priv *hpriv)
+{
+       char *param, *end, *str, *mask_s;
+       char *name;
+
+       if (!strlen(ahci_mask_port_map))
+               return;
+
+       str = kstrdup(ahci_mask_port_map, GFP_KERNEL);
+       if (!str)
+               return;
+
+       /* Handle single mask case */
+       if (!strchr(str, '=')) {
+               ahci_apply_port_map_mask(dev, hpriv, str);
+               goto free;
+       }
+
+       /*
+        * Mask list case: parse the parameter to apply the mask only if
+        * the device name matches.
+        */
+       param = str;
+       end = param + strlen(param);
+       while (param && param < end && *param) {
+               name = param;
+               param = strchr(name, '=');
+               if (!param)
+                       break;
+
+               *param = '\0';
+               param++;
+               if (param >= end)
+                       break;
+
+               if (strcmp(dev_name(dev), name) != 0) {
+                       param = strchr(param, ',');
+                       if (param)
+                               param++;
+                       continue;
+               }
+
+               mask_s = param;
+               param = strchr(mask_s, ',');
+               if (param) {
+                       *param = '\0';
+                       param++;
+               }
+
+               ahci_apply_port_map_mask(dev, hpriv, mask_s);
+       }
+
+free:
+       kfree(str);
+}
+
 static void ahci_pci_save_initial_config(struct pci_dev *pdev,
                                         struct ahci_host_priv *hpriv)
 {
@@ -688,6 +769,10 @@ static void ahci_pci_save_initial_config(struct pci_dev *pdev,
                          "Disabling your PATA port. Use the boot option 'ahci.marvell_enable=0' to avoid this.\n");
        }
 
+       /* Handle port map masks passed as module parameter. */
+       if (ahci_mask_port_map)
+               ahci_get_port_map_mask(&pdev->dev, hpriv);
+
        ahci_save_initial_config(&pdev->dev, hpriv);
 }
 
index be3412cdb22e78a1d663337698f07b07c66727e4..c449d60d9bb962c80ac7e196d08dd722d2c6950b 100644 (file)
@@ -2539,7 +2539,7 @@ static void ata_dev_config_cdl(struct ata_device *dev)
        bool cdl_enabled;
        u64 val;
 
-       if (ata_id_major_version(dev->id) < 12)
+       if (ata_id_major_version(dev->id) < 11)
                goto not_supported;
 
        if (!ata_log_supported(dev, ATA_LOG_IDENTIFY_DEVICE) ||
index 2f4c58837641077f3ad91974cd11affbe6dcd1e8..e954976891a9f502930a3a7ffc5f31df113d2326 100644 (file)
@@ -4745,7 +4745,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                         * bail out.
                         */
                        if (ap->pflags & ATA_PFLAG_SUSPENDED)
-                               goto unlock;
+                               goto unlock_ap;
 
                        if (!sdev)
                                continue;
@@ -4758,7 +4758,7 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                        if (do_resume) {
                                ret = scsi_resume_device(sdev);
                                if (ret == -EWOULDBLOCK)
-                                       goto unlock;
+                                       goto unlock_scan;
                                dev->flags &= ~ATA_DFLAG_RESUMING;
                        }
                        ret = scsi_rescan_device(sdev);
@@ -4766,12 +4766,13 @@ void ata_scsi_dev_rescan(struct work_struct *work)
                        spin_lock_irqsave(ap->lock, flags);
 
                        if (ret)
-                               goto unlock;
+                               goto unlock_ap;
                }
        }
 
-unlock:
+unlock_ap:
        spin_unlock_irqrestore(ap->lock, flags);
+unlock_scan:
        mutex_unlock(&ap->scsi_scan_mutex);
 
        /* Reschedule with a delay if scsi_rescan_device() returned an error */
index 89ed6cd6b059ebb0af77dcc0d2b83a72fe995dc4..e9cc8b4786fbfb9eba5d3c1d8c06c3d08477a132 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/of_address.h>
 #include <linux/device.h>
 #include <linux/bitfield.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
 #include <asm/cacheflush.h>
 #include <asm/cacheinfo.h>
 #include <asm/dma-noncoherent.h>
@@ -247,13 +249,49 @@ static irqreturn_t ccache_int_handler(int irq, void *device)
        return IRQ_HANDLED;
 }
 
+static int sifive_ccache_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       unsigned long quirks;
+       int intr_num, rc;
+
+       quirks = (unsigned long)device_get_match_data(dev);
+
+       intr_num = platform_irq_count(pdev);
+       if (!intr_num)
+               return dev_err_probe(dev, -ENODEV, "No interrupts property\n");
+
+       for (int i = 0; i < intr_num; i++) {
+               if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
+                       continue;
+
+               g_irq[i] = platform_get_irq(pdev, i);
+               if (g_irq[i] < 0)
+                       return g_irq[i];
+
+               rc = devm_request_irq(dev, g_irq[i], ccache_int_handler, 0, "ccache_ecc", NULL);
+               if (rc)
+                       return dev_err_probe(dev, rc, "Could not request IRQ %d\n", g_irq[i]);
+       }
+
+       return 0;
+}
+
+static struct platform_driver sifive_ccache_driver = {
+       .probe  = sifive_ccache_probe,
+       .driver = {
+               .name           = "sifive_ccache",
+               .of_match_table = sifive_ccache_ids,
+       },
+};
+
 static int __init sifive_ccache_init(void)
 {
        struct device_node *np;
        struct resource res;
-       int i, rc, intr_num;
        const struct of_device_id *match;
        unsigned long quirks;
+       int rc;
 
        np = of_find_matching_node_and_match(NULL, sifive_ccache_ids, &match);
        if (!np)
@@ -277,28 +315,6 @@ static int __init sifive_ccache_init(void)
                goto err_unmap;
        }
 
-       intr_num = of_property_count_u32_elems(np, "interrupts");
-       if (!intr_num) {
-               pr_err("No interrupts property\n");
-               rc = -ENODEV;
-               goto err_unmap;
-       }
-
-       for (i = 0; i < intr_num; i++) {
-               g_irq[i] = irq_of_parse_and_map(np, i);
-
-               if (i == DATA_UNCORR && (quirks & QUIRK_BROKEN_DATA_UNCORR))
-                       continue;
-
-               rc = request_irq(g_irq[i], ccache_int_handler, 0, "ccache_ecc",
-                                NULL);
-               if (rc) {
-                       pr_err("Could not request IRQ %d\n", g_irq[i]);
-                       goto err_free_irq;
-               }
-       }
-       of_node_put(np);
-
 #ifdef CONFIG_RISCV_NONSTANDARD_CACHE_OPS
        if (quirks & QUIRK_NONSTANDARD_CACHE_OPS) {
                riscv_cbom_block_size = SIFIVE_CCACHE_LINE_SIZE;
@@ -315,11 +331,15 @@ static int __init sifive_ccache_init(void)
 #ifdef CONFIG_DEBUG_FS
        setup_sifive_debug();
 #endif
+
+       rc = platform_driver_register(&sifive_ccache_driver);
+       if (rc)
+               goto err_unmap;
+
+       of_node_put(np);
+
        return 0;
 
-err_free_irq:
-       while (--i >= 0)
-               free_irq(g_irq[i], NULL);
 err_unmap:
        iounmap(ccache_base);
 err_node_put:
index 456be28ba67cb476846c83c532e7bd04e521463f..2597cb43f43871dc0dc629c13b0b0ee3acf1398a 100644 (file)
@@ -702,7 +702,7 @@ static void extract_entropy(void *buf, size_t len)
 
 static void __cold _credit_init_bits(size_t bits)
 {
-       static struct execute_work set_ready;
+       static DECLARE_WORK(set_ready, crng_set_ready);
        unsigned int new, orig, add;
        unsigned long flags;
 
@@ -718,8 +718,8 @@ static void __cold _credit_init_bits(size_t bits)
 
        if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
                crng_reseed(NULL); /* Sets crng_init to CRNG_READY under base_crng.lock. */
-               if (static_key_initialized)
-                       execute_in_process_context(crng_set_ready, &set_ready);
+               if (static_key_initialized && system_unbound_wq)
+                       queue_work(system_unbound_wq, &set_ready);
                atomic_notifier_call_chain(&random_ready_notifier, 0, NULL);
                wake_up_interruptible(&crng_init_wait);
                kill_fasync(&fasync, SIGIO, POLL_IN);
@@ -890,8 +890,8 @@ void __init random_init(void)
 
        /*
         * If we were initialized by the cpu or bootloader before jump labels
-        * are initialized, then we should enable the static branch here, where
-        * it's guaranteed that jump labels have been initialized.
+        * or workqueues are initialized, then we should enable the static
+        * branch here, where it's guaranteed that these have been initialized.
         */
        if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
                crng_set_ready(NULL);
index af5cb818f84d6bf566e6c0a84763d8239d64700f..cb8c155a2c9b3dbdcbf00f198c5783b9559f8a89 100644 (file)
@@ -525,22 +525,11 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport)
 {
        struct acpi_device *hb = to_cxl_host_bridge(NULL, dev);
        u32 uid;
-       int rc;
 
        if (kstrtou32(acpi_device_uid(hb), 0, &uid))
                return -EINVAL;
 
-       rc = acpi_get_genport_coordinates(uid, dport->hb_coord);
-       if (rc < 0)
-               return rc;
-
-       /* Adjust back to picoseconds from nanoseconds */
-       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
-               dport->hb_coord[i].read_latency *= 1000;
-               dport->hb_coord[i].write_latency *= 1000;
-       }
-
-       return 0;
+       return acpi_get_genport_coordinates(uid, dport->coord);
 }
 
 static int add_host_bridge_dport(struct device *match, void *arg)
index eddbbe21450ca9dca5e71bf6ec14866cde0935d3..bb83867d9fec985634bb9b03652f1eaa34fc8a22 100644 (file)
 struct dsmas_entry {
        struct range dpa_range;
        u8 handle;
-       struct access_coordinate coord;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
 
        int entries;
        int qos_class;
 };
 
+static u32 cdat_normalize(u16 entry, u64 base, u8 type)
+{
+       u32 value;
+
+       /*
+        * Check for invalid and overflow values
+        */
+       if (entry == 0xffff || !entry)
+               return 0;
+       else if (base > (UINT_MAX / (entry)))
+               return 0;
+
+       /*
+        * CDAT fields follow the format of HMAT fields. See table 5 Device
+        * Scoped Latency and Bandwidth Information Structure in Coherent Device
+        * Attribute Table (CDAT) Specification v1.01.
+        */
+       value = entry * base;
+       switch (type) {
+       case ACPI_HMAT_ACCESS_LATENCY:
+       case ACPI_HMAT_READ_LATENCY:
+       case ACPI_HMAT_WRITE_LATENCY:
+               value = DIV_ROUND_UP(value, 1000);
+               break;
+       default:
+               break;
+       }
+       return value;
+}
+
 static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
                              const unsigned long end)
 {
@@ -58,8 +88,8 @@ static int cdat_dsmas_handler(union acpi_subtable_headers *header, void *arg,
        return 0;
 }
 
-static void cxl_access_coordinate_set(struct access_coordinate *coord,
-                                     int access, unsigned int val)
+static void __cxl_access_coordinate_set(struct access_coordinate *coord,
+                                       int access, unsigned int val)
 {
        switch (access) {
        case ACPI_HMAT_ACCESS_LATENCY:
@@ -85,6 +115,13 @@ static void cxl_access_coordinate_set(struct access_coordinate *coord,
        }
 }
 
+static void cxl_access_coordinate_set(struct access_coordinate *coord,
+                                     int access, unsigned int val)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               __cxl_access_coordinate_set(&coord[i], access, val);
+}
+
 static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
                               const unsigned long end)
 {
@@ -97,7 +134,6 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
        __le16 le_val;
        u64 val;
        u16 len;
-       int rc;
 
        len = le16_to_cpu((__force __le16)hdr->length);
        if (len != size || (unsigned long)hdr + len > end) {
@@ -124,12 +160,10 @@ static int cdat_dslbis_handler(union acpi_subtable_headers *header, void *arg,
 
        le_base = (__force __le64)dslbis->entry_base_unit;
        le_val = (__force __le16)dslbis->entry[0];
-       rc = check_mul_overflow(le64_to_cpu(le_base),
-                               le16_to_cpu(le_val), &val);
-       if (rc)
-               pr_warn("DSLBIS value overflowed.\n");
+       val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+                            dslbis->data_type);
 
-       cxl_access_coordinate_set(&dent->coord, dslbis->data_type, val);
+       cxl_access_coordinate_set(dent->coord, dslbis->data_type, val);
 
        return 0;
 }
@@ -163,25 +197,18 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port,
 static int cxl_port_perf_data_calculate(struct cxl_port *port,
                                        struct xarray *dsmas_xa)
 {
-       struct access_coordinate ep_c;
-       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
+       struct access_coordinate ep_c[ACCESS_COORDINATE_MAX];
        struct dsmas_entry *dent;
        int valid_entries = 0;
        unsigned long index;
        int rc;
 
-       rc = cxl_endpoint_get_perf_coordinates(port, &ep_c);
+       rc = cxl_endpoint_get_perf_coordinates(port, ep_c);
        if (rc) {
                dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n");
                return rc;
        }
 
-       rc = cxl_hb_get_perf_coordinates(port, coord);
-       if (rc)  {
-               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
-               return rc;
-       }
-
        struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(port);
 
        if (!cxl_root)
@@ -193,18 +220,10 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
        xa_for_each(dsmas_xa, index, dent) {
                int qos_class;
 
-               cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c);
-               /*
-                * Keeping the host bridge coordinates separate from the dsmas
-                * coordinates in order to allow calculation of access class
-                * 0 and 1 for region later.
-                */
-               cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU],
-                                       &coord[ACCESS_COORDINATE_CPU],
-                                       &dent->coord);
+               cxl_coordinates_combine(dent->coord, dent->coord, ep_c);
                dent->entries = 1;
                rc = cxl_root->ops->qos_class(cxl_root,
-                                             &coord[ACCESS_COORDINATE_CPU],
+                                             &dent->coord[ACCESS_COORDINATE_CPU],
                                              1, &qos_class);
                if (rc != 1)
                        continue;
@@ -222,14 +241,17 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port,
 static void update_perf_entry(struct device *dev, struct dsmas_entry *dent,
                              struct cxl_dpa_perf *dpa_perf)
 {
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               dpa_perf->coord[i] = dent->coord[i];
        dpa_perf->dpa_range = dent->dpa_range;
-       dpa_perf->coord = dent->coord;
        dpa_perf->qos_class = dent->qos_class;
        dev_dbg(dev,
                "DSMAS: dpa: %#llx qos: %d read_bw: %d write_bw %d read_lat: %d write_lat: %d\n",
                dent->dpa_range.start, dpa_perf->qos_class,
-               dent->coord.read_bandwidth, dent->coord.write_bandwidth,
-               dent->coord.read_latency, dent->coord.write_latency);
+               dent->coord[ACCESS_COORDINATE_CPU].read_bandwidth,
+               dent->coord[ACCESS_COORDINATE_CPU].write_bandwidth,
+               dent->coord[ACCESS_COORDINATE_CPU].read_latency,
+               dent->coord[ACCESS_COORDINATE_CPU].write_latency);
 }
 
 static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds,
@@ -461,17 +483,16 @@ static int cdat_sslbis_handler(union acpi_subtable_headers *header, void *arg,
 
                le_base = (__force __le64)tbl->sslbis_header.entry_base_unit;
                le_val = (__force __le16)tbl->entries[i].latency_or_bandwidth;
-
-               if (check_mul_overflow(le64_to_cpu(le_base),
-                                      le16_to_cpu(le_val), &val))
-                       dev_warn(dev, "SSLBIS value overflowed!\n");
+               val = cdat_normalize(le16_to_cpu(le_val), le64_to_cpu(le_base),
+                                    sslbis->data_type);
 
                xa_for_each(&port->dports, index, dport) {
                        if (dsp_id == ACPI_CDAT_SSLBIS_ANY_PORT ||
-                           dsp_id == dport->port_id)
-                               cxl_access_coordinate_set(&dport->sw_coord,
+                           dsp_id == dport->port_id) {
+                               cxl_access_coordinate_set(dport->coord,
                                                          sslbis->data_type,
                                                          val);
+                       }
                }
        }
 
@@ -493,6 +514,21 @@ void cxl_switch_parse_cdat(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL);
 
+static void __cxl_coordinates_combine(struct access_coordinate *out,
+                                     struct access_coordinate *c1,
+                                     struct access_coordinate *c2)
+{
+               if (c1->write_bandwidth && c2->write_bandwidth)
+                       out->write_bandwidth = min(c1->write_bandwidth,
+                                                  c2->write_bandwidth);
+               out->write_latency = c1->write_latency + c2->write_latency;
+
+               if (c1->read_bandwidth && c2->read_bandwidth)
+                       out->read_bandwidth = min(c1->read_bandwidth,
+                                                 c2->read_bandwidth);
+               out->read_latency = c1->read_latency + c2->read_latency;
+}
+
 /**
  * cxl_coordinates_combine - Combine the two input coordinates
  *
@@ -504,15 +540,8 @@ void cxl_coordinates_combine(struct access_coordinate *out,
                             struct access_coordinate *c1,
                             struct access_coordinate *c2)
 {
-               if (c1->write_bandwidth && c2->write_bandwidth)
-                       out->write_bandwidth = min(c1->write_bandwidth,
-                                                  c2->write_bandwidth);
-               out->write_latency = c1->write_latency + c2->write_latency;
-
-               if (c1->read_bandwidth && c2->read_bandwidth)
-                       out->read_bandwidth = min(c1->read_bandwidth,
-                                                 c2->read_bandwidth);
-               out->read_latency = c1->read_latency + c2->read_latency;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               __cxl_coordinates_combine(&out[i], &c1[i], &c2[i]);
 }
 
 MODULE_IMPORT_NS(CXL);
@@ -521,17 +550,13 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
                                    struct cxl_endpoint_decoder *cxled)
 {
        struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
-       struct cxl_port *port = cxlmd->endpoint;
        struct cxl_dev_state *cxlds = cxlmd->cxlds;
        struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
-       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
-       struct access_coordinate coord;
        struct range dpa = {
                        .start = cxled->dpa_res->start,
                        .end = cxled->dpa_res->end,
        };
        struct cxl_dpa_perf *perf;
-       int rc;
 
        switch (cxlr->mode) {
        case CXL_DECODER_RAM:
@@ -549,35 +574,16 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
        if (!range_contains(&perf->dpa_range, &dpa))
                return;
 
-       rc = cxl_hb_get_perf_coordinates(port, hb_coord);
-       if (rc)  {
-               dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n");
-               return;
-       }
-
        for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
-               /* Pickup the host bridge coords */
-               cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord);
-
                /* Get total bandwidth and the worst latency for the cxl region */
                cxlr->coord[i].read_latency = max_t(unsigned int,
                                                    cxlr->coord[i].read_latency,
-                                                   coord.read_latency);
+                                                   perf->coord[i].read_latency);
                cxlr->coord[i].write_latency = max_t(unsigned int,
                                                     cxlr->coord[i].write_latency,
-                                                    coord.write_latency);
-               cxlr->coord[i].read_bandwidth += coord.read_bandwidth;
-               cxlr->coord[i].write_bandwidth += coord.write_bandwidth;
-
-               /*
-                * Convert latency to nanosec from picosec to be consistent
-                * with the resulting latency coordinates computed by the
-                * HMAT_REPORTING code.
-                */
-               cxlr->coord[i].read_latency =
-                       DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000);
-               cxlr->coord[i].write_latency =
-                       DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000);
+                                                    perf->coord[i].write_latency);
+               cxlr->coord[i].read_bandwidth += perf->coord[i].read_bandwidth;
+               cxlr->coord[i].write_bandwidth += perf->coord[i].write_bandwidth;
        }
 }
 
index 9adda4795eb786b8658b573dd1e79befbad52255..f0f54aeccc872b50311a14958ddf874860af7982 100644 (file)
@@ -915,7 +915,7 @@ static int cxl_clear_event_record(struct cxl_memdev_state *mds,
 
                payload->handles[i++] = gen->hdr.handle;
                dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
-                       le16_to_cpu(payload->handles[i]));
+                       le16_to_cpu(payload->handles[i - 1]));
 
                if (i == max_handles) {
                        payload->nr_recs = i;
@@ -958,13 +958,14 @@ static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
                .payload_in = &log_type,
                .size_in = sizeof(log_type),
                .payload_out = payload,
-               .size_out = mds->payload_size,
                .min_out = struct_size(payload, records, 0),
        };
 
        do {
                int rc, i;
 
+               mbox_cmd.size_out = mds->payload_size;
+
                rc = cxl_internal_send_cmd(mds, &mbox_cmd);
                if (rc) {
                        dev_err_ratelimited(dev,
index 2b0cab556072f560420f7f7bf4d0bcddd0a01b4a..762783bb091afc8a40883c9ab2ee9c0f39e37219 100644 (file)
@@ -2133,36 +2133,44 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
 }
 EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
 
-/**
- * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator
- *                              and host bridge
- *
- * @port: endpoint cxl_port
- * @coord: output access coordinates
- *
- * Return: errno on failure, 0 on success.
- */
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
-                               struct access_coordinate *coord)
+static void add_latency(struct access_coordinate *c, long latency)
 {
-       struct cxl_port *iter = port;
-       struct cxl_dport *dport;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               c[i].write_latency += latency;
+               c[i].read_latency += latency;
+       }
+}
 
-       if (!is_cxl_endpoint(port))
-               return -EINVAL;
+static bool coordinates_valid(struct access_coordinate *c)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               if (c[i].read_bandwidth && c[i].write_bandwidth &&
+                   c[i].read_latency && c[i].write_latency)
+                       continue;
+               return false;
+       }
 
-       dport = iter->parent_dport;
-       while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
-               iter = to_cxl_port(iter->dev.parent);
-               dport = iter->parent_dport;
+       return true;
+}
+
+static void set_min_bandwidth(struct access_coordinate *c, unsigned int bw)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               c[i].write_bandwidth = min(c[i].write_bandwidth, bw);
+               c[i].read_bandwidth = min(c[i].read_bandwidth, bw);
        }
+}
 
-       coord[ACCESS_COORDINATE_LOCAL] =
-               dport->hb_coord[ACCESS_COORDINATE_LOCAL];
-       coord[ACCESS_COORDINATE_CPU] =
-               dport->hb_coord[ACCESS_COORDINATE_CPU];
+static void set_access_coordinates(struct access_coordinate *out,
+                                  struct access_coordinate *in)
+{
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++)
+               out[i] = in[i];
+}
 
-       return 0;
+static bool parent_port_is_cxl_root(struct cxl_port *port)
+{
+       return is_cxl_root(to_cxl_port(port->dev.parent));
 }
 
 /**
@@ -2176,35 +2184,53 @@ int cxl_hb_get_perf_coordinates(struct cxl_port *port,
 int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                      struct access_coordinate *coord)
 {
-       struct access_coordinate c = {
-               .read_bandwidth = UINT_MAX,
-               .write_bandwidth = UINT_MAX,
+       struct access_coordinate c[] = {
+               {
+                       .read_bandwidth = UINT_MAX,
+                       .write_bandwidth = UINT_MAX,
+               },
+               {
+                       .read_bandwidth = UINT_MAX,
+                       .write_bandwidth = UINT_MAX,
+               },
        };
        struct cxl_port *iter = port;
        struct cxl_dport *dport;
        struct pci_dev *pdev;
        unsigned int bw;
+       bool is_cxl_root;
 
        if (!is_cxl_endpoint(port))
                return -EINVAL;
 
-       dport = iter->parent_dport;
-
        /*
-        * Exit the loop when the parent port of the current port is cxl root.
-        * The iterative loop starts at the endpoint and gathers the
-        * latency of the CXL link from the current iter to the next downstream
-        * port each iteration. If the parent is cxl root then there is
-        * nothing to gather.
+        * Exit the loop when the parent port of the current iter port is cxl
+        * root. The iterative loop starts at the endpoint and gathers the
+        * latency of the CXL link from the current device/port to the connected
+        * downstream port each iteration.
         */
-       while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) {
-               cxl_coordinates_combine(&c, &c, &dport->sw_coord);
-               c.write_latency += dport->link_latency;
-               c.read_latency += dport->link_latency;
-
-               iter = to_cxl_port(iter->dev.parent);
+       do {
                dport = iter->parent_dport;
-       }
+               iter = to_cxl_port(iter->dev.parent);
+               is_cxl_root = parent_port_is_cxl_root(iter);
+
+               /*
+                * There's no valid access_coordinate for a root port since RPs do not
+                * have CDAT and therefore needs to be skipped.
+                */
+               if (!is_cxl_root) {
+                       if (!coordinates_valid(dport->coord))
+                               return -EINVAL;
+                       cxl_coordinates_combine(c, c, dport->coord);
+               }
+               add_latency(c, dport->link_latency);
+       } while (!is_cxl_root);
+
+       dport = iter->parent_dport;
+       /* Retrieve HB coords */
+       if (!coordinates_valid(dport->coord))
+               return -EINVAL;
+       cxl_coordinates_combine(c, c, dport->coord);
 
        /* Get the calculated PCI paths bandwidth */
        pdev = to_pci_dev(port->uport_dev->parent);
@@ -2213,10 +2239,8 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                return -ENXIO;
        bw /= BITS_PER_BYTE;
 
-       c.write_bandwidth = min(c.write_bandwidth, bw);
-       c.read_bandwidth = min(c.read_bandwidth, bw);
-
-       *coord = c;
+       set_min_bandwidth(c, bw);
+       set_access_coordinates(coord, c);
 
        return 0;
 }
index 372786f809555f66509186c3e3476af2fad0d7f8..3c42f984eeafaa54af79ac280cd24c0df62f944f 100644 (file)
@@ -271,6 +271,7 @@ EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
 static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
                                struct cxl_register_map *map)
 {
+       u8 reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
        int bar = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
        u64 offset = ((u64)reg_hi << 32) |
                     (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
@@ -278,11 +279,11 @@ static bool cxl_decode_regblock(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi,
        if (offset > pci_resource_len(pdev, bar)) {
                dev_warn(&pdev->dev,
                         "BAR%d: %pr: too small (offset: %pa, type: %d)\n", bar,
-                        &pdev->resource[bar], &offset, map->reg_type);
+                        &pdev->resource[bar], &offset, reg_type);
                return false;
        }
 
-       map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+       map->reg_type = reg_type;
        map->resource = pci_resource_start(pdev, bar) + offset;
        map->max_size = pci_resource_len(pdev, bar) - offset;
        return true;
index 534e25e2f0a48197a0588abd8a46d996bb333ed8..036d17db68e0068752277adf0e5b56c7b526e566 100644 (file)
@@ -663,8 +663,7 @@ struct cxl_rcrb_info {
  * @rch: Indicate whether this dport was enumerated in RCH or VH mode
  * @port: reference to cxl_port that contains this downstream port
  * @regs: Dport parsed register blocks
- * @sw_coord: access coordinates (performance) for switch from CDAT
- * @hb_coord: access coordinates (performance) from ACPI generic port (host bridge)
+ * @coord: access coordinates (bandwidth and latency performance attributes)
  * @link_latency: calculated PCIe downstream latency
  */
 struct cxl_dport {
@@ -675,8 +674,7 @@ struct cxl_dport {
        bool rch;
        struct cxl_port *port;
        struct cxl_regs regs;
-       struct access_coordinate sw_coord;
-       struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX];
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
        long link_latency;
 };
 
@@ -884,8 +882,6 @@ void cxl_switch_parse_cdat(struct cxl_port *port);
 
 int cxl_endpoint_get_perf_coordinates(struct cxl_port *port,
                                      struct access_coordinate *coord);
-int cxl_hb_get_perf_coordinates(struct cxl_port *port,
-                               struct access_coordinate *coord);
 void cxl_region_perf_data_calculate(struct cxl_region *cxlr,
                                    struct cxl_endpoint_decoder *cxled);
 
index 20fb3b35e89e0473ee8ad42dcd17407086fb8cdb..36cee9c30cebd20488ec5afd216187ef82497e54 100644 (file)
@@ -401,7 +401,7 @@ enum cxl_devtype {
  */
 struct cxl_dpa_perf {
        struct range dpa_range;
-       struct access_coordinate coord;
+       struct access_coordinate coord[ACCESS_COORDINATE_MAX];
        int qos_class;
 };
 
index f2556a8e940156bc4f9d34ae5dc92aac837b688a..9bc2e10381afd9cc6f97d6dd50510c8daa092b5b 100644 (file)
@@ -790,7 +790,7 @@ static void ffa_notification_info_get(void)
 
                        part_id = packed_id_list[ids_processed++];
 
-                       if (!ids_count[list]) { /* Global Notification */
+                       if (ids_count[list] == 1) { /* Global Notification */
                                __do_sched_recv_cb(part_id, 0, false);
                                continue;
                        }
index ea9201e7044cbdbfea4d12bb5ac2390330c5d911..1fa79bba492e880fea5af80a038eddf4cce7c003 100644 (file)
@@ -736,7 +736,7 @@ static void scmi_powercap_domain_init_fc(const struct scmi_protocol_handle *ph,
        ph->hops->fastchannel_init(ph, POWERCAP_DESCRIBE_FASTCHANNEL,
                                   POWERCAP_PAI_GET, 4, domain,
                                   &fc[POWERCAP_FC_PAI].get_addr, NULL,
-                                  &fc[POWERCAP_PAI_GET].rate_limit);
+                                  &fc[POWERCAP_FC_PAI].rate_limit);
 
        *p_fc = fc;
 }
index 350573518503355f6abaa4d24cbcac6368e8930c..130d13e9cd6beb93498469fae489b05e5ba1dfab 100644 (file)
@@ -921,7 +921,7 @@ static int scmi_dbg_raw_mode_open(struct inode *inode, struct file *filp)
        rd->raw = raw;
        filp->private_data = rd;
 
-       return 0;
+       return nonseekable_open(inode, filp);
 }
 
 static int scmi_dbg_raw_mode_release(struct inode *inode, struct file *filp)
@@ -950,6 +950,7 @@ static const struct file_operations scmi_dbg_raw_mode_reset_fops = {
        .open = scmi_dbg_raw_mode_open,
        .release = scmi_dbg_raw_mode_release,
        .write = scmi_dbg_raw_mode_reset_write,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -959,6 +960,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_fops = {
        .read = scmi_dbg_raw_mode_message_read,
        .write = scmi_dbg_raw_mode_message_write,
        .poll = scmi_dbg_raw_mode_message_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -975,6 +977,7 @@ static const struct file_operations scmi_dbg_raw_mode_message_async_fops = {
        .read = scmi_dbg_raw_mode_message_read,
        .write = scmi_dbg_raw_mode_message_async_write,
        .poll = scmi_dbg_raw_mode_message_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -998,6 +1001,7 @@ static const struct file_operations scmi_dbg_raw_mode_notification_fops = {
        .release = scmi_dbg_raw_mode_release,
        .read = scmi_test_dbg_raw_mode_notif_read,
        .poll = scmi_test_dbg_raw_mode_notif_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
@@ -1021,6 +1025,7 @@ static const struct file_operations scmi_dbg_raw_mode_errors_fops = {
        .release = scmi_dbg_raw_mode_release,
        .read = scmi_test_dbg_raw_mode_errors_read,
        .poll = scmi_test_dbg_raw_mode_errors_poll,
+       .llseek = no_llseek,
        .owner = THIS_MODULE,
 };
 
index 1ee62cd58582b6496f0536fa7c45e2dc0305797f..25db014494a4de9bb8c44d0b2bd39d8786c3bb59 100644 (file)
@@ -92,7 +92,7 @@ static inline int to_reg(int gpio, enum ctrl_register reg_type)
                case 0x5e:
                        return GPIOPANELCTL;
                default:
-                       return -EOPNOTSUPP;
+                       return -ENOTSUPP;
                }
        }
 
index 5ef8af8249806aa6c1b226ed4ab9219cca91d936..c097e310c9e841044a3ef214444170721d116537 100644 (file)
@@ -529,6 +529,7 @@ static const struct of_device_id lpc32xx_gpio_of_match[] = {
        { .compatible = "nxp,lpc3220-gpio", },
        { },
 };
+MODULE_DEVICE_TABLE(of, lpc32xx_gpio_of_match);
 
 static struct platform_driver lpc32xx_gpio_driver = {
        .driver         = {
index c18b6b47384f1b8b9a3a26c3ac7c5f125e82d365..94ca9d03c0949453abf3ad82e013698a7a97ffda 100644 (file)
@@ -104,7 +104,7 @@ static inline int to_reg(int gpio, enum ctrl_register type)
        unsigned int reg = type == CTRL_IN ? GPIO_IN_CTRL_BASE : GPIO_OUT_CTRL_BASE;
 
        if (gpio >= WCOVE_GPIO_NUM)
-               return -EOPNOTSUPP;
+               return -ENOTSUPP;
 
        return reg + gpio;
 }
index 9c62552bec344e370996a028d809934e4a6f4420..b3b84647207ed47463e004e2c72745c6120857d1 100644 (file)
@@ -210,6 +210,7 @@ extern int amdgpu_async_gfx_ring;
 extern int amdgpu_mcbp;
 extern int amdgpu_discovery;
 extern int amdgpu_mes;
+extern int amdgpu_mes_log_enable;
 extern int amdgpu_mes_kiq;
 extern int amdgpu_noretry;
 extern int amdgpu_force_asic_type;
index 0a4b09709cfb149078c6284f2a0908cbde928430..ec888fc6ead8df0ce52ec00439e5f22ca7f4e9ff 100644 (file)
@@ -819,7 +819,7 @@ retry:
 
        p->bytes_moved += ctx.bytes_moved;
        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-           amdgpu_bo_in_cpu_visible_vram(bo))
+           amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                p->bytes_moved_vis += ctx.bytes_moved;
 
        if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
index aa16d51dd8421b38a0a34fcf89263ffbf08af4fd..7753a2e64d4114a280afc99beb341f4af8f4ffac 100644 (file)
@@ -4135,18 +4135,22 @@ int amdgpu_device_init(struct amdgpu_device *adev,
                                        adev->ip_blocks[i].status.hw = true;
                                }
                        }
+               } else if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 10) &&
+                                  !amdgpu_device_has_display_hardware(adev)) {
+                                       r = psp_gpu_reset(adev);
                } else {
-                       tmp = amdgpu_reset_method;
-                       /* It should do a default reset when loading or reloading the driver,
-                        * regardless of the module parameter reset_method.
-                        */
-                       amdgpu_reset_method = AMD_RESET_METHOD_NONE;
-                       r = amdgpu_asic_reset(adev);
-                       amdgpu_reset_method = tmp;
-                       if (r) {
-                               dev_err(adev->dev, "asic reset on init failed\n");
-                               goto failed;
-                       }
+                               tmp = amdgpu_reset_method;
+                               /* It should do a default reset when loading or reloading the driver,
+                                * regardless of the module parameter reset_method.
+                                */
+                               amdgpu_reset_method = AMD_RESET_METHOD_NONE;
+                               r = amdgpu_asic_reset(adev);
+                               amdgpu_reset_method = tmp;
+               }
+
+               if (r) {
+                 dev_err(adev->dev, "asic reset on init failed\n");
+                 goto failed;
                }
        }
 
index fdd36fb027ab6aa04b31c790af80596bb7da0427..ac5bf01fe8d2a9e9741d00981683b0e32b02f4eb 100644 (file)
@@ -1896,6 +1896,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
                amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
                break;
        case IP_VERSION(14, 0, 0):
+       case IP_VERSION(14, 0, 1):
                amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block);
                break;
        default:
index 80b9642f2bc4f25c69e9f30c70138f073e0c6cd2..e4277298cf1aad3518025b898162ea6224e874de 100644 (file)
@@ -195,6 +195,7 @@ int amdgpu_async_gfx_ring = 1;
 int amdgpu_mcbp = -1;
 int amdgpu_discovery = -1;
 int amdgpu_mes;
+int amdgpu_mes_log_enable = 0;
 int amdgpu_mes_kiq;
 int amdgpu_noretry = -1;
 int amdgpu_force_asic_type = -1;
@@ -667,6 +668,15 @@ MODULE_PARM_DESC(mes,
        "Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
 module_param_named(mes, amdgpu_mes, int, 0444);
 
+/**
+ * DOC: mes_log_enable (int)
+ * Enable Micro Engine Scheduler log. This is used to enable/disable MES internal log.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes_log_enable,
+       "Enable Micro Engine Scheduler log (0 = disabled (default), 1 = enabled)");
+module_param_named(mes_log_enable, amdgpu_mes_log_enable, int, 0444);
+
 /**
  * DOC: mes_kiq (int)
  * Enable Micro Engine Scheduler KIQ. This is a new engine pipe for kiq.
index 4b3000c21ef2c59cba09ca39e3dc5421208049d5..e4742b65032d1dce16db69ea086c86dd4895e610 100644 (file)
@@ -304,12 +304,15 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
                dma_fence_set_error(finished, -ECANCELED);
 
        if (finished->error < 0) {
-               DRM_INFO("Skip scheduling IBs!\n");
+               dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
+                       ring->name);
        } else {
                r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
                                       &fence);
                if (r)
-                       DRM_ERROR("Error scheduling IBs (%d)\n", r);
+                       dev_err(adev->dev,
+                               "Error scheduling IBs (%d) in ring(%s)", r,
+                               ring->name);
        }
 
        job->job_run_counter++;
index a98e03e0a51f1f741895d253f896e76de29f9aec..a00cf4756ad0e2f371742e760183882773a80243 100644 (file)
@@ -102,7 +102,10 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev)
 {
        int r;
 
-       r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+       if (!amdgpu_mes_log_enable)
+               return 0;
+
+       r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE,
                                    AMDGPU_GEM_DOMAIN_GTT,
                                    &adev->mes.event_log_gpu_obj,
                                    &adev->mes.event_log_gpu_addr,
@@ -1549,12 +1552,11 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused)
        uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr);
 
        seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4,
-                    mem, PAGE_SIZE, false);
+                    mem, AMDGPU_MES_LOG_BUFFER_SIZE, false);
 
        return 0;
 }
 
-
 DEFINE_SHOW_ATTRIBUTE(amdgpu_debugfs_mes_event_log);
 
 #endif
@@ -1565,7 +1567,7 @@ void amdgpu_debugfs_mes_event_log_init(struct amdgpu_device *adev)
 #if defined(CONFIG_DEBUG_FS)
        struct drm_minor *minor = adev_to_drm(adev)->primary;
        struct dentry *root = minor->debugfs_root;
-       if (adev->enable_mes)
+       if (adev->enable_mes && amdgpu_mes_log_enable)
                debugfs_create_file("amdgpu_mes_event_log", 0444, root,
                                    adev, &amdgpu_debugfs_mes_event_log_fops);
 
index 7d4f93fea937ae1d82ebd95af9cad8dc71586034..4c8fc3117ef8948627ef6a83cb7f603de2991662 100644 (file)
@@ -52,6 +52,7 @@ enum amdgpu_mes_priority_level {
 
 #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */
 #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */
+#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */
 
 struct amdgpu_mes_funcs;
 
index 010b0cb7693c9c3be5608f192cb19e583a285893..2099159a693fa02e7c508c3aecdc9f695498cecd 100644 (file)
@@ -617,8 +617,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
                return r;
 
        if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
-           bo->tbo.resource->mem_type == TTM_PL_VRAM &&
-           amdgpu_bo_in_cpu_visible_vram(bo))
+           amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
                                             ctx.bytes_moved);
        else
@@ -1272,23 +1271,25 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
 void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
                          struct amdgpu_mem_stats *stats)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       struct ttm_resource *res = bo->tbo.resource;
        uint64_t size = amdgpu_bo_size(bo);
        struct drm_gem_object *obj;
        unsigned int domain;
        bool shared;
 
        /* Abort if the BO doesn't currently have a backing store */
-       if (!bo->tbo.resource)
+       if (!res)
                return;
 
        obj = &bo->tbo.base;
        shared = drm_gem_object_is_shared_for_memory_stats(obj);
 
-       domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
+       domain = amdgpu_mem_type_to_domain(res->mem_type);
        switch (domain) {
        case AMDGPU_GEM_DOMAIN_VRAM:
                stats->vram += size;
-               if (amdgpu_bo_in_cpu_visible_vram(bo))
+               if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                        stats->visible_vram += size;
                if (shared)
                        stats->vram_shared += size;
@@ -1389,10 +1390,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
        /* Remember that this BO was accessed by the CPU */
        abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 
-       if (bo->resource->mem_type != TTM_PL_VRAM)
-               return 0;
-
-       if (amdgpu_bo_in_cpu_visible_vram(abo))
+       if (amdgpu_res_cpu_visible(adev, bo->resource))
                return 0;
 
        /* Can't move a pinned BO to visible VRAM */
@@ -1415,7 +1413,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
        /* this should never happen */
        if (bo->resource->mem_type == TTM_PL_VRAM &&
-           !amdgpu_bo_in_cpu_visible_vram(abo))
+           !amdgpu_res_cpu_visible(adev, bo->resource))
                return VM_FAULT_SIGBUS;
 
        ttm_bo_move_to_lru_tail_unlocked(bo);
@@ -1579,6 +1577,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
  */
 u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 {
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
        struct dma_buf_attachment *attachment;
        struct dma_buf *dma_buf;
        const char *placement;
@@ -1587,10 +1586,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
 
        if (dma_resv_trylock(bo->tbo.base.resv)) {
                unsigned int domain;
+
                domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
                switch (domain) {
                case AMDGPU_GEM_DOMAIN_VRAM:
-                       if (amdgpu_bo_in_cpu_visible_vram(bo))
+                       if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
                                placement = "VRAM VISIBLE";
                        else
                                placement = "VRAM";
index be679c42b0b8cb5d127910803e79593910c72952..fa03d9e4874cc65b39e038014ab15fc4e58ba858 100644 (file)
@@ -250,28 +250,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
        return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
 }
 
-/**
- * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
- */
-static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
-{
-       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-       struct amdgpu_res_cursor cursor;
-
-       if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
-               return false;
-
-       amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
-       while (cursor.remaining) {
-               if (cursor.start < adev->gmc.visible_vram_size)
-                       return true;
-
-               amdgpu_res_next(&cursor, cursor.size);
-       }
-
-       return false;
-}
-
 /**
  * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
  */
index fc418e670fdae27b699bdbefce8051ab128ab76c..1d71729e3f6bcef2c02f9e1ce252dc6cd6461b94 100644 (file)
@@ -133,7 +133,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 
                } else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
                           !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
-                          amdgpu_bo_in_cpu_visible_vram(abo)) {
+                          amdgpu_res_cpu_visible(adev, bo->resource)) {
 
                        /* Try evicting to the CPU inaccessible part of VRAM
                         * first, but only set GTT as busy placement, so this
@@ -403,40 +403,55 @@ error:
        return r;
 }
 
-/*
- * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
+/**
+ * amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
+ * @adev: amdgpu device
+ * @res: the resource to check
  *
- * Called by amdgpu_bo_move()
+ * Returns: true if the full resource is CPU visible, false otherwise.
  */
-static bool amdgpu_mem_visible(struct amdgpu_device *adev,
-                              struct ttm_resource *mem)
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+                           struct ttm_resource *res)
 {
-       u64 mem_size = (u64)mem->size;
        struct amdgpu_res_cursor cursor;
-       u64 end;
 
-       if (mem->mem_type == TTM_PL_SYSTEM ||
-           mem->mem_type == TTM_PL_TT)
+       if (!res)
+               return false;
+
+       if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
+           res->mem_type == AMDGPU_PL_PREEMPT)
                return true;
-       if (mem->mem_type != TTM_PL_VRAM)
+
+       if (res->mem_type != TTM_PL_VRAM)
                return false;
 
-       amdgpu_res_first(mem, 0, mem_size, &cursor);
-       end = cursor.start + cursor.size;
+       amdgpu_res_first(res, 0, res->size, &cursor);
        while (cursor.remaining) {
+               if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
+                       return false;
                amdgpu_res_next(&cursor, cursor.size);
+       }
 
-               if (!cursor.remaining)
-                       break;
+       return true;
+}
 
-               /* ttm_resource_ioremap only supports contiguous memory */
-               if (end != cursor.start)
-                       return false;
+/*
+ * amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
+ *
+ * Called by amdgpu_bo_move()
+ */
+static bool amdgpu_res_copyable(struct amdgpu_device *adev,
+                               struct ttm_resource *mem)
+{
+       if (!amdgpu_res_cpu_visible(adev, mem))
+               return false;
 
-               end = cursor.start + cursor.size;
-       }
+       /* ttm_resource_ioremap only supports contiguous memory */
+       if (mem->mem_type == TTM_PL_VRAM &&
+           !(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
+               return false;
 
-       return end <= adev->gmc.visible_vram_size;
+       return true;
 }
 
 /*
@@ -529,8 +544,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
 
        if (r) {
                /* Check that all memory is CPU accessible */
-               if (!amdgpu_mem_visible(adev, old_mem) ||
-                   !amdgpu_mem_visible(adev, new_mem)) {
+               if (!amdgpu_res_copyable(adev, old_mem) ||
+                   !amdgpu_res_copyable(adev, new_mem)) {
                        pr_err("Move buffer fallback to memcpy unavailable\n");
                        return r;
                }
@@ -557,7 +572,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
                                     struct ttm_resource *mem)
 {
        struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
-       size_t bus_size = (size_t)mem->size;
 
        switch (mem->mem_type) {
        case TTM_PL_SYSTEM:
@@ -568,9 +582,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,
                break;
        case TTM_PL_VRAM:
                mem->bus.offset = mem->start << PAGE_SHIFT;
-               /* check if it's visible */
-               if ((mem->bus.offset + bus_size) > adev->gmc.visible_vram_size)
-                       return -EINVAL;
 
                if (adev->mman.aper_base_kaddr &&
                    mem->placement & TTM_PL_FLAG_CONTIGUOUS)
index 65ec82141a8e012e8ba42b0bb627f1a4f504c465..32cf6b6f6efd96873c294648714f2c78f6ff9ec3 100644 (file)
@@ -139,6 +139,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
 int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
                                      uint64_t start);
 
+bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
+                           struct ttm_resource *res);
+
 int amdgpu_ttm_init(struct amdgpu_device *adev);
 void amdgpu_ttm_fini(struct amdgpu_device *adev);
 void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
index 4299ce386322e7cea27232ae05a1222f62f5a850..94089069c9ada61aa61b7c2b28601b764d47c172 100644 (file)
@@ -1613,6 +1613,37 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
        trace_amdgpu_vm_bo_map(bo_va, mapping);
 }
 
+/* Validate operation parameters to prevent potential abuse */
+static int amdgpu_vm_verify_parameters(struct amdgpu_device *adev,
+                                         struct amdgpu_bo *bo,
+                                         uint64_t saddr,
+                                         uint64_t offset,
+                                         uint64_t size)
+{
+       uint64_t tmp, lpfn;
+
+       if (saddr & AMDGPU_GPU_PAGE_MASK
+           || offset & AMDGPU_GPU_PAGE_MASK
+           || size & AMDGPU_GPU_PAGE_MASK)
+               return -EINVAL;
+
+       if (check_add_overflow(saddr, size, &tmp)
+           || check_add_overflow(offset, size, &tmp)
+           || size == 0 /* which also leads to end < begin */)
+               return -EINVAL;
+
+       /* make sure object fit at this offset */
+       if (bo && offset + size > amdgpu_bo_size(bo))
+               return -EINVAL;
+
+       /* Ensure last pfn not exceed max_pfn */
+       lpfn = (saddr + size - 1) >> AMDGPU_GPU_PAGE_SHIFT;
+       if (lpfn >= adev->vm_manager.max_pfn)
+               return -EINVAL;
+
+       return 0;
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -1639,21 +1670,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
        struct amdgpu_bo *bo = bo_va->base.bo;
        struct amdgpu_vm *vm = bo_va->base.vm;
        uint64_t eaddr;
+       int r;
 
-       /* validate the parameters */
-       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
-               return -EINVAL;
-       if (saddr + size <= saddr || offset + size <= offset)
-               return -EINVAL;
-
-       /* make sure object fit at this offset */
-       eaddr = saddr + size - 1;
-       if ((bo && offset + size > amdgpu_bo_size(bo)) ||
-           (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
-               return -EINVAL;
+       r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+       if (r)
+               return r;
 
        saddr /= AMDGPU_GPU_PAGE_SIZE;
-       eaddr /= AMDGPU_GPU_PAGE_SIZE;
+       eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
 
        tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
        if (tmp) {
@@ -1706,17 +1730,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        uint64_t eaddr;
        int r;
 
-       /* validate the parameters */
-       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK || size & ~PAGE_MASK)
-               return -EINVAL;
-       if (saddr + size <= saddr || offset + size <= offset)
-               return -EINVAL;
-
-       /* make sure object fit at this offset */
-       eaddr = saddr + size - 1;
-       if ((bo && offset + size > amdgpu_bo_size(bo)) ||
-           (eaddr >= adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT))
-               return -EINVAL;
+       r = amdgpu_vm_verify_parameters(adev, bo, saddr, offset, size);
+       if (r)
+               return r;
 
        /* Allocate all the needed memory */
        mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
@@ -1730,7 +1746,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        }
 
        saddr /= AMDGPU_GPU_PAGE_SIZE;
-       eaddr /= AMDGPU_GPU_PAGE_SIZE;
+       eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
 
        mapping->start = saddr;
        mapping->last = eaddr;
@@ -1817,10 +1833,14 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
        struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
        LIST_HEAD(removed);
        uint64_t eaddr;
+       int r;
+
+       r = amdgpu_vm_verify_parameters(adev, NULL, saddr, 0, size);
+       if (r)
+               return r;
 
-       eaddr = saddr + size - 1;
        saddr /= AMDGPU_GPU_PAGE_SIZE;
-       eaddr /= AMDGPU_GPU_PAGE_SIZE;
+       eaddr = saddr + (size - 1) / AMDGPU_GPU_PAGE_SIZE;
 
        /* Allocate all the needed memory */
        before = kzalloc(sizeof(*before), GFP_KERNEL);
index d6f808acfb17b79d98664d0fedaa95d8e29a4270..fbb43ae7624f44ebd13ddbe5a78865ea2dba10ab 100644 (file)
@@ -62,6 +62,11 @@ void aqua_vanjaram_doorbell_index_init(struct amdgpu_device *adev)
        adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_LAYOUT1_MAX_ASSIGNMENT << 1;
 }
 
+static bool aqua_vanjaram_xcp_vcn_shared(struct amdgpu_device *adev)
+{
+       return (adev->xcp_mgr->num_xcps > adev->vcn.num_vcn_inst);
+}
+
 static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
                             uint32_t inst_idx, struct amdgpu_ring *ring)
 {
@@ -87,7 +92,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
        case AMDGPU_RING_TYPE_VCN_ENC:
        case AMDGPU_RING_TYPE_VCN_JPEG:
                ip_blk = AMDGPU_XCP_VCN;
-               if (adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+               if (aqua_vanjaram_xcp_vcn_shared(adev))
                        inst_mask = 1 << (inst_idx * 2);
                break;
        default:
@@ -140,10 +145,12 @@ static int aqua_vanjaram_xcp_sched_list_update(
 
                aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id);
 
-               /* VCN is shared by two partitions under CPX MODE */
+               /* VCN may be shared by two partitions under CPX MODE in certain
+                * configs.
+                */
                if ((ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
-                       ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
-                       adev->xcp_mgr->mode == AMDGPU_CPX_PARTITION_MODE)
+                    ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) &&
+                   aqua_vanjaram_xcp_vcn_shared(adev))
                        aqua_vanjaram_xcp_gpu_sched_update(adev, ring, ring->xcp_id + 1);
        }
 
index 1770e496c1b7ce21198fdb80d3051c4c961e9b5f..f7325b02a191f726196d4ad0ac6fa3d090ab9977 100644 (file)
@@ -1635,7 +1635,7 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev)
                        active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa));
        }
 
-       active_rb_bitmap |= global_active_rb_bitmap;
+       active_rb_bitmap &= global_active_rb_bitmap;
        adev->gfx.config.backend_enable_mask = active_rb_bitmap;
        adev->gfx.config.num_rbs = hweight32(active_rb_bitmap);
 }
@@ -5465,6 +5465,7 @@ static void gfx_v11_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
        /* Make sure that we can't skip the SET_Q_MODE packets when the VM
         * changed in any way.
         */
+       ring->set_q_mode_offs = 0;
        ring->set_q_mode_ptr = NULL;
 }
 
index 072c478665ade1a838f810bfadc10b32bf44a5eb..63f281a9984d986961d70511c83b6e65272979b7 100644 (file)
@@ -411,8 +411,11 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
        mes_set_hw_res_pkt.enable_reg_active_poll = 1;
        mes_set_hw_res_pkt.enable_level_process_quantum_check = 1;
        mes_set_hw_res_pkt.oversubscription_timer = 50;
-       mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
-       mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr;
+       if (amdgpu_mes_log_enable) {
+               mes_set_hw_res_pkt.enable_mes_event_int_logging = 1;
+               mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr =
+                                       mes->event_log_gpu_addr;
+       }
 
        return mes_v11_0_submit_pkt_and_poll_completion(mes,
                        &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
index 34237a1b1f2e45c40989c2070bdc0ae071ee0c4b..82eab49be82bb99807e5caabf5079b32dfd4cb26 100644 (file)
@@ -1602,19 +1602,9 @@ static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
        u32 sdma_cntl;
 
        sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
-       switch (state) {
-       case AMDGPU_IRQ_STATE_DISABLE:
-               sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL,
-                                         DRAM_ECC_INT_ENABLE, 0);
-               WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
-               break;
-       /* sdma ecc interrupt is enabled by default
-        * driver doesn't need to do anything to
-        * enable the interrupt */
-       case AMDGPU_IRQ_STATE_ENABLE:
-       default:
-               break;
-       }
+       sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, DRAM_ECC_INT_ENABLE,
+                                       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
+       WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
 
        return 0;
 }
index 581a3bd11481cc8d44a4f22188551d5a2803cff5..43ca63fe85ac3b0f9236a27766f3a78f42c2fbfb 100644 (file)
@@ -457,10 +457,8 @@ static bool soc21_need_full_reset(struct amdgpu_device *adev)
 {
        switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
        case IP_VERSION(11, 0, 0):
-               return amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC);
        case IP_VERSION(11, 0, 2):
        case IP_VERSION(11, 0, 3):
-               return false;
        default:
                return true;
        }
@@ -722,7 +720,10 @@ static int soc21_common_early_init(void *handle)
                        AMD_PG_SUPPORT_VCN |
                        AMD_PG_SUPPORT_JPEG |
                        AMD_PG_SUPPORT_GFX_PG;
-               adev->external_rev_id = adev->rev_id + 0x1;
+               if (adev->rev_id == 0)
+                       adev->external_rev_id = 0x1;
+               else
+                       adev->external_rev_id = adev->rev_id + 0x10;
                break;
        case IP_VERSION(11, 5, 1):
                adev->cg_flags =
@@ -869,10 +870,35 @@ static int soc21_common_suspend(void *handle)
        return soc21_common_hw_fini(adev);
 }
 
+static bool soc21_need_reset_on_resume(struct amdgpu_device *adev)
+{
+       u32 sol_reg1, sol_reg2;
+
+       /* Will reset for the following suspend abort cases.
+        * 1) Only reset dGPU side.
+        * 2) S3 suspend got aborted and TOS is active.
+        */
+       if (!(adev->flags & AMD_IS_APU) && adev->in_s3 &&
+           !adev->suspend_complete) {
+               sol_reg1 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+               msleep(100);
+               sol_reg2 = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_81);
+
+               return (sol_reg1 != sol_reg2);
+       }
+
+       return false;
+}
+
 static int soc21_common_resume(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+       if (soc21_need_reset_on_resume(adev)) {
+               dev_info(adev->dev, "S3 suspend aborted, resetting...");
+               soc21_asic_reset(adev);
+       }
+
        return soc21_common_hw_init(adev);
 }
 
index 84368cf1e17535c16c031ce6677f53769f9e8f94..bd57896ab85d565770bd75484b5443de9891d601 100644 (file)
@@ -225,6 +225,8 @@ static int umsch_mm_v4_0_ring_start(struct amdgpu_umsch_mm *umsch)
 
        WREG32_SOC15(VCN, 0, regVCN_UMSCH_RB_SIZE, ring->ring_size);
 
+       ring->wptr = 0;
+
        data = RREG32_SOC15(VCN, 0, regVCN_RB_ENABLE);
        data &= ~(VCN_RB_ENABLE__AUDIO_RB_EN_MASK);
        WREG32_SOC15(VCN, 0, regVCN_RB_ENABLE, data);
index f9631f4b1a02ca5121d7b382fe128c47c7718ec5..55aa74cbc5325e23451aa255dd5ce016e0aa4df8 100644 (file)
@@ -779,8 +779,8 @@ static int kfd_ioctl_get_process_apertures_new(struct file *filp,
         * nodes, but not more than args->num_of_nodes as that is
         * the amount of memory allocated by user
         */
-       pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
-                               args->num_of_nodes), GFP_KERNEL);
+       pa = kcalloc(args->num_of_nodes, sizeof(struct kfd_process_device_apertures),
+                    GFP_KERNEL);
        if (!pa)
                return -ENOMEM;
 
index 041ec3de55e72f24a6cce44e8a75682bf3381531..719d6d365e15016abca596bb7d9d1994b6e54996 100644 (file)
@@ -960,7 +960,6 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 {
        struct kfd_node *node;
        int i;
-       int count;
 
        if (!kfd->init_complete)
                return;
@@ -968,12 +967,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
        /* for runtime suspend, skip locking kfd */
        if (!run_pm) {
                mutex_lock(&kfd_processes_mutex);
-               count = ++kfd_locked;
-               mutex_unlock(&kfd_processes_mutex);
-
                /* For first KFD device suspend all the KFD processes */
-               if (count == 1)
+               if (++kfd_locked == 1)
                        kfd_suspend_all_processes();
+               mutex_unlock(&kfd_processes_mutex);
        }
 
        for (i = 0; i < kfd->num_nodes; i++) {
@@ -984,7 +981,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
 
 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 {
-       int ret, count, i;
+       int ret, i;
 
        if (!kfd->init_complete)
                return 0;
@@ -998,12 +995,10 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
        /* for runtime resume, skip unlocking kfd */
        if (!run_pm) {
                mutex_lock(&kfd_processes_mutex);
-               count = --kfd_locked;
-               mutex_unlock(&kfd_processes_mutex);
-
-               WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
-               if (count == 0)
+               if (--kfd_locked == 0)
                        ret = kfd_resume_all_processes();
+               WARN_ONCE(kfd_locked < 0, "KFD suspend / resume ref. error");
+               mutex_unlock(&kfd_processes_mutex);
        }
 
        return ret;
index f4d395e38683db7c85f3a7f5fc922e93b1222f88..0b655555e1678643fb84fa8b3e1640cd35a9a74e 100644 (file)
@@ -2001,6 +2001,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
                dev_err(dev, "HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
                while (halt_if_hws_hang)
                        schedule();
+               kfd_hws_hang(dqm);
                return -ETIME;
        }
 
index 717a60d7a4ea953b8dfc369b09d855ad74b49659..b79986412cd839bc89741a0b3bc1986daa2b10e4 100644 (file)
@@ -819,9 +819,9 @@ struct kfd_process *kfd_create_process(struct task_struct *thread)
        mutex_lock(&kfd_processes_mutex);
 
        if (kfd_is_locked()) {
-               mutex_unlock(&kfd_processes_mutex);
                pr_debug("KFD is locked! Cannot create process");
-               return ERR_PTR(-EINVAL);
+               process = ERR_PTR(-EINVAL);
+               goto out;
        }
 
        /* A prior open of /dev/kfd could have already created the process. */
index 71d2d44681b218fc5146f3354464ba0f9c08610e..6d2f60c61decc36711953fa5b0dd67888c652a32 100644 (file)
@@ -148,6 +148,9 @@ MODULE_FIRMWARE(FIRMWARE_NAVI12_DMCU);
 #define FIRMWARE_DCN_35_DMUB "amdgpu/dcn_3_5_dmcub.bin"
 MODULE_FIRMWARE(FIRMWARE_DCN_35_DMUB);
 
+#define FIRMWARE_DCN_351_DMUB "amdgpu/dcn_3_5_1_dmcub.bin"
+MODULE_FIRMWARE(FIRMWARE_DCN_351_DMUB);
+
 /* Number of bytes in PSP header for firmware. */
 #define PSP_HEADER_BYTES 0x100
 
@@ -3044,6 +3047,10 @@ static int dm_resume(void *handle)
        /* Do mst topology probing after resuming cached state*/
        drm_connector_list_iter_begin(ddev, &iter);
        drm_for_each_connector_iter(connector, &iter) {
+
+               if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+                       continue;
+
                aconnector = to_amdgpu_dm_connector(connector);
                if (aconnector->dc_link->type != dc_connection_mst_branch ||
                    aconnector->mst_root)
@@ -4820,9 +4827,11 @@ static int dm_init_microcode(struct amdgpu_device *adev)
                fw_name_dmub = FIRMWARE_DCN_V3_2_1_DMCUB;
                break;
        case IP_VERSION(3, 5, 0):
-       case IP_VERSION(3, 5, 1):
                fw_name_dmub = FIRMWARE_DCN_35_DMUB;
                break;
+       case IP_VERSION(3, 5, 1):
+               fw_name_dmub = FIRMWARE_DCN_351_DMUB;
+               break;
        default:
                /* ASIC doesn't support DMUB. */
                return 0;
@@ -5921,6 +5930,9 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
                &aconnector->base.probed_modes :
                &aconnector->base.modes;
 
+       if (aconnector->base.connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
+               return NULL;
+
        if (aconnector->freesync_vid_base.clock != 0)
                return &aconnector->freesync_vid_base;
 
@@ -6306,19 +6318,16 @@ create_stream_for_sink(struct drm_connector *connector,
        if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A)
                mod_build_hf_vsif_infopacket(stream, &stream->vsp_infopacket);
 
-       if (stream->link->psr_settings.psr_feature_enabled || stream->link->replay_settings.replay_feature_enabled) {
+       if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT ||
+           stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST ||
+           stream->signal == SIGNAL_TYPE_EDP) {
                //
                // should decide stream support vsc sdp colorimetry capability
                // before building vsc info packet
                //
-               stream->use_vsc_sdp_for_colorimetry = false;
-               if (aconnector->dc_sink->sink_signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
-                       stream->use_vsc_sdp_for_colorimetry =
-                               aconnector->dc_sink->is_vsc_sdp_colorimetry_supported;
-               } else {
-                       if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
-                               stream->use_vsc_sdp_for_colorimetry = true;
-               }
+               stream->use_vsc_sdp_for_colorimetry = stream->link->dpcd_caps.dpcd_rev.raw >= 0x14 &&
+                                                     stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED;
+
                if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
                        tf = TRANSFER_FUNC_GAMMA_22;
                mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
@@ -8762,10 +8771,10 @@ static void amdgpu_dm_commit_audio(struct drm_device *dev,
                if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
                        continue;
 
+notify:
                if (connector->connector_type == DRM_MODE_CONNECTOR_WRITEBACK)
                        continue;
 
-notify:
                aconnector = to_amdgpu_dm_connector(connector);
 
                mutex_lock(&adev->dm.audio_lock);
index 16e72d623630caa74e22bcb6052b162c3ff8f6c6..08c494a7a21bad10929eb3f367a465349bc9ae5c 100644 (file)
@@ -76,10 +76,8 @@ static int amdgpu_dm_wb_encoder_atomic_check(struct drm_encoder *encoder,
 
 static int amdgpu_dm_wb_connector_get_modes(struct drm_connector *connector)
 {
-       struct drm_device *dev = connector->dev;
-
-       return drm_add_modes_noedid(connector, dev->mode_config.max_width,
-                                   dev->mode_config.max_height);
+       /* Maximum resolution supported by DWB */
+       return drm_add_modes_noedid(connector, 3840, 2160);
 }
 
 static int amdgpu_dm_wb_prepare_job(struct drm_writeback_connector *wb_connector,
index 12f3e8aa46d8dfae21b5dd1e9f4ef167ee314f2d..6ad4f4efec5dd3e684428a0fb5b3c7b4a5234075 100644 (file)
@@ -99,20 +99,25 @@ static int dcn316_get_active_display_cnt_wa(
        return display_count;
 }
 
-static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context, bool disable)
+static void dcn316_disable_otg_wa(struct clk_mgr *clk_mgr_base, struct dc_state *context,
+               bool safe_to_lower, bool disable)
 {
        struct dc *dc = clk_mgr_base->ctx->dc;
        int i;
 
        for (i = 0; i < dc->res_pool->pipe_count; ++i) {
-               struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+               struct pipe_ctx *pipe = safe_to_lower
+                       ? &context->res_ctx.pipe_ctx[i]
+                       : &dc->current_state->res_ctx.pipe_ctx[i];
 
                if (pipe->top_pipe || pipe->prev_odm_pipe)
                        continue;
-               if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
-                                    dc_is_virtual_signal(pipe->stream->signal))) {
+               if (pipe->stream && (pipe->stream->dpms_off || dc_is_virtual_signal(pipe->stream->signal) ||
+                                    !pipe->stream->link_enc)) {
                        if (disable) {
-                               pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+                               if (pipe->stream_res.tg && pipe->stream_res.tg->funcs->immediate_disable_crtc)
+                                       pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
+
                                reset_sync_context_for_pipe(dc, context, i);
                        } else
                                pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
@@ -207,11 +212,11 @@ static void dcn316_update_clocks(struct clk_mgr *clk_mgr_base,
        }
 
        if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
-               dcn316_disable_otg_wa(clk_mgr_base, context, true);
+               dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, true);
 
                clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
                dcn316_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
-               dcn316_disable_otg_wa(clk_mgr_base, context, false);
+               dcn316_disable_otg_wa(clk_mgr_base, context, safe_to_lower, false);
 
                update_dispclk = true;
        }
index 101fe96287cb480bf9ee142ceb998a84ab1027f8..d9c5692c86c21ac15b85af1ba0cae92f4274a255 100644 (file)
 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK            0x00000007L
 #define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK            0x000F0000L
 
+#define regCLK5_0_CLK5_spll_field_8                            0x464b
+#define regCLK5_0_CLK5_spll_field_8_BASE_IDX   0
+
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT   0xd
+#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK             0x00002000L
+
 #define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
 
 #define REG(reg_name) \
@@ -411,6 +417,17 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
 {
 }
 
+static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+{
+       struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+       struct dc_context *ctx = clk_mgr->base.ctx;
+       uint32_t ssc_enable;
+
+       REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
+
+       return ssc_enable == 1;
+}
+
 static void init_clk_states(struct clk_mgr *clk_mgr)
 {
        struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
@@ -428,7 +445,16 @@ static void init_clk_states(struct clk_mgr *clk_mgr)
 
 void dcn35_init_clocks(struct clk_mgr *clk_mgr)
 {
+       struct clk_mgr_internal *clk_mgr_int = TO_CLK_MGR_INTERNAL(clk_mgr);
        init_clk_states(clk_mgr);
+
+       // to adjust dp_dto reference clock if ssc is enable otherwise to apply dprefclk
+       if (dcn35_is_spll_ssc_enabled(clk_mgr))
+               clk_mgr->dp_dto_source_clock_in_khz =
+                       dce_adjust_dp_ref_freq_for_ss(clk_mgr_int, clk_mgr->dprefclk_khz);
+       else
+               clk_mgr->dp_dto_source_clock_in_khz = clk_mgr->dprefclk_khz;
+
 }
 static struct clk_bw_params dcn35_bw_params = {
        .vram_type = Ddr4MemType,
@@ -517,6 +543,28 @@ static DpmClocks_t_dcn35 dummy_clocks;
 
 static struct dcn35_watermarks dummy_wms = { 0 };
 
+static struct dcn35_ss_info_table ss_info_table = {
+       .ss_divider = 1000,
+       .ss_percentage = {0, 0, 375, 375, 375}
+};
+
+static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+{
+       struct dc_context *ctx = clk_mgr->base.ctx;
+       uint32_t clock_source;
+
+       REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+       // If it's DFS mode, clock_source is 0.
+       if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+               clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+
+               if (clk_mgr->dprefclk_ss_percentage != 0) {
+                       clk_mgr->ss_on_dprefclk = true;
+                       clk_mgr->dprefclk_ss_divider = ss_info_table.ss_divider;
+               }
+       }
+}
+
 static void dcn35_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn35_watermarks *table)
 {
        int i, num_valid_sets;
@@ -1061,6 +1109,8 @@ void dcn35_clk_mgr_construct(
        dce_clock_read_ss_info(&clk_mgr->base);
        /*when clk src is from FCH, it could have ss, same clock src as DPREF clk*/
 
+       dcn35_read_ss_info_from_lut(&clk_mgr->base);
+
        clk_mgr->base.base.bw_params = &dcn35_bw_params;
 
        if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
index 5cc7f8da209c599f7585e8f10e499ef2118f34ff..61986e5cb491967643b61832c8e35dd7a4818d41 100644 (file)
@@ -436,6 +436,15 @@ bool dc_state_add_plane(
                goto out;
        }
 
+       if (stream_status->plane_count == 0 && dc->config.enable_windowed_mpo_odm)
+               /* ODM combine could prevent us from supporting more planes
+                * we will reset ODM slice count back to 1 when all planes have
+                * been removed to maximize the amount of planes supported when
+                * new planes are added.
+                */
+               resource_update_pipes_for_stream_with_slice_count(
+                               state, dc->current_state, dc->res_pool, stream, 1);
+
        otg_master_pipe = resource_get_otg_master_for_stream(
                        &state->res_ctx, stream);
        if (otg_master_pipe)
index 970644b695cd4f1d96f166cc1786987b460cdafd..b5e0289d2fe82aed149fab851ebc1b73213406ac 100644 (file)
@@ -976,7 +976,10 @@ static bool dcn31_program_pix_clk(
        struct bp_pixel_clock_parameters bp_pc_params = {0};
        enum transmitter_color_depth bp_pc_colour_depth = TRANSMITTER_COLOR_DEPTH_24;
 
-       if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
+       // Apply ssed(spread spectrum) dpref clock for edp only.
+       if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0
+               && pix_clk_params->signal_type == SIGNAL_TYPE_EDP
+               && encoding == DP_8b_10b_ENCODING)
                dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
        // For these signal types Driver to program DP_DTO without calling VBIOS Command table
        if (dc_is_dp_signal(pix_clk_params->signal_type) || dc_is_virtual_signal(pix_clk_params->signal_type)) {
@@ -1093,9 +1096,6 @@ static bool get_pixel_clk_frequency_100hz(
        unsigned int modulo_hz = 0;
        unsigned int dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dprefclk_khz;
 
-       if (clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz != 0)
-               dp_dto_ref_khz = clock_source->ctx->dc->clk_mgr->dp_dto_source_clock_in_khz;
-
        if (clock_source->id == CLOCK_SOURCE_ID_DP_DTO) {
                clock_hz = REG_READ(PHASE[inst]);
 
index e224a028d68accaf083a76a93eb7f0cdb940aedf..8a0460e86309775e83775093b04527f022e4a91c 100644 (file)
@@ -248,14 +248,12 @@ void dcn32_link_encoder_construct(
        enc10->base.hpd_source = init_data->hpd_source;
        enc10->base.connector = init_data->connector;
 
-       enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
-
-       enc10->base.features = *enc_features;
        if (enc10->base.connector.id == CONNECTOR_ID_USBC)
                enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
-       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
+       enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
+
+       enc10->base.features = *enc_features;
 
        enc10->base.transmitter = init_data->transmitter;
 
index 81e349d5835bbed499f03ef6eb33e5210c83d64b..da94e5309fbaf0f8e06a4a1aad4ce431a8d9f2cc 100644 (file)
@@ -184,6 +184,8 @@ void dcn35_link_encoder_construct(
        enc10->base.hpd_source = init_data->hpd_source;
        enc10->base.connector = init_data->connector;
 
+       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
+               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
        enc10->base.preferred_engine = ENGINE_ID_UNKNOWN;
 
@@ -238,8 +240,6 @@ void dcn35_link_encoder_construct(
        }
 
        enc10->base.features.flags.bits.HDMI_6GB_EN = 1;
-       if (enc10->base.connector.id == CONNECTOR_ID_USBC)
-               enc10->base.features.flags.bits.DP_IS_USB_C = 1;
 
        if (bp_funcs->get_connector_speed_cap_info)
                result = bp_funcs->get_connector_speed_cap_info(enc10->base.ctx->dc_bios,
index f07a4c7e48bc23ed0d2351aef46ef38907ee265f..52eab8fccb7f16e9b1f02d541c030f5736b79ace 100644 (file)
@@ -267,9 +267,6 @@ static void optc32_setup_manual_trigger(struct timing_generator *optc)
                                OTG_V_TOTAL_MAX_SEL, 1,
                                OTG_FORCE_LOCK_ON_EVENT, 0,
                                OTG_SET_V_TOTAL_MIN_MASK, (1 << 1)); /* TRIGA */
-
-               // Setup manual flow control for EOF via TRIG_A
-               optc->funcs->setup_manual_trigger(optc);
        }
 }
 
index 246b211b1e85f74d362efac0e38384a2cafb59fc..65333141b1c1b05645f9ba374896dc3ee5a682e5 100644 (file)
@@ -735,7 +735,7 @@ static int smu_early_init(void *handle)
        smu->adev = adev;
        smu->pm_enabled = !!amdgpu_dpm;
        smu->is_apu = false;
-       smu->smu_baco.state = SMU_BACO_STATE_EXIT;
+       smu->smu_baco.state = SMU_BACO_STATE_NONE;
        smu->smu_baco.platform_support = false;
        smu->user_dpm_profile.fan_mode = -1;
 
@@ -1966,10 +1966,25 @@ static int smu_smc_hw_cleanup(struct smu_context *smu)
        return 0;
 }
 
+static int smu_reset_mp1_state(struct smu_context *smu)
+{
+       struct amdgpu_device *adev = smu->adev;
+       int ret = 0;
+
+       if ((!adev->in_runpm) && (!adev->in_suspend) &&
+               (!amdgpu_in_reset(adev)) && amdgpu_ip_version(adev, MP1_HWIP, 0) ==
+                                                                       IP_VERSION(13, 0, 10) &&
+               !amdgpu_device_has_display_hardware(adev))
+               ret = smu_set_mp1_state(smu, PP_MP1_STATE_UNLOAD);
+
+       return ret;
+}
+
 static int smu_hw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
        struct smu_context *smu = adev->powerplay.pp_handle;
+       int ret;
 
        if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev))
                return 0;
@@ -1987,7 +2002,15 @@ static int smu_hw_fini(void *handle)
 
        adev->pm.dpm_enabled = false;
 
-       return smu_smc_hw_cleanup(smu);
+       ret = smu_smc_hw_cleanup(smu);
+       if (ret)
+               return ret;
+
+       ret = smu_reset_mp1_state(smu);
+       if (ret)
+               return ret;
+
+       return 0;
 }
 
 static void smu_late_fini(void *handle)
index a870bdd49a4e3cd4741e1fe852c7a337117451fb..1fa81575788c545a39178275ab036cbe6dcdb0c9 100644 (file)
@@ -424,6 +424,7 @@ enum smu_reset_mode {
 enum smu_baco_state {
        SMU_BACO_STATE_ENTER = 0,
        SMU_BACO_STATE_EXIT,
+       SMU_BACO_STATE_NONE,
 };
 
 struct smu_baco_context {
index 5bb7a63c0602b79012017bb9cfc7705fb581b38d..97522c0852589d63a84009a518b0af4719021ba5 100644 (file)
@@ -144,6 +144,37 @@ typedef struct {
   uint32_t MaxGfxClk;
 } DpmClocks_t;
 
+//Freq in MHz
+//Voltage in milli volts with 2 fractional bits
+typedef struct {
+  uint32_t DcfClocks[NUM_DCFCLK_DPM_LEVELS];
+  uint32_t DispClocks[NUM_DISPCLK_DPM_LEVELS];
+  uint32_t DppClocks[NUM_DPPCLK_DPM_LEVELS];
+  uint32_t SocClocks[NUM_SOCCLK_DPM_LEVELS];
+  uint32_t VClocks0[NUM_VCN_DPM_LEVELS];
+  uint32_t VClocks1[NUM_VCN_DPM_LEVELS];
+  uint32_t DClocks0[NUM_VCN_DPM_LEVELS];
+  uint32_t DClocks1[NUM_VCN_DPM_LEVELS];
+  uint32_t VPEClocks[NUM_VPE_DPM_LEVELS];
+  uint32_t FclkClocks_Freq[NUM_FCLK_DPM_LEVELS];
+  uint32_t FclkClocks_Voltage[NUM_FCLK_DPM_LEVELS];
+  uint32_t SocVoltage[NUM_SOC_VOLTAGE_LEVELS];
+  MemPstateTable_t MemPstateTable[NUM_MEM_PSTATE_LEVELS];
+
+  uint8_t  NumDcfClkLevelsEnabled;
+  uint8_t  NumDispClkLevelsEnabled; //Applies to both Dispclk and Dppclk
+  uint8_t  NumSocClkLevelsEnabled;
+  uint8_t  Vcn0ClkLevelsEnabled;     //Applies to both Vclk0 and Dclk0
+  uint8_t  Vcn1ClkLevelsEnabled;     //Applies to both Vclk1 and Dclk1
+  uint8_t  VpeClkLevelsEnabled;
+  uint8_t  NumMemPstatesEnabled;
+  uint8_t  NumFclkLevelsEnabled;
+  uint8_t  spare;
+
+  uint32_t MinGfxClk;
+  uint32_t MaxGfxClk;
+} DpmClocks_t_v14_0_1;
+
 typedef struct {
   uint16_t CoreFrequency[16];          //Target core frequency [MHz]
   uint16_t CorePower[16];              //CAC calculated core power [mW]
@@ -224,7 +255,7 @@ typedef enum {
 #define TABLE_CUSTOM_DPM            2 // Called by Driver
 #define TABLE_BIOS_GPIO_CONFIG      3 // Called by BIOS
 #define TABLE_DPMCLOCKS             4 // Called by Driver and VBIOS
-#define TABLE_SPARE0                5 // Unused
+#define TABLE_MOMENTARY_PM          5 // Called by Tools
 #define TABLE_MODERN_STDBY          6 // Called by Tools for Modern Standby Log
 #define TABLE_SMU_METRICS           7 // Called by Driver and SMF/PMF
 #define TABLE_COUNT                 8
index 356e0f57a426ffa051fb40611947d9b50355ad87..ddb62586008319ba7c95758e562ca4118ddb5f48 100644 (file)
@@ -42,7 +42,7 @@
 #define FEATURE_EDC_BIT                      7
 #define FEATURE_PLL_POWER_DOWN_BIT           8
 #define FEATURE_VDDOFF_BIT                   9
-#define FEATURE_VCN_DPM_BIT                 10
+#define FEATURE_VCN_DPM_BIT                 10   /* this is for both VCN0 and VCN1 */
 #define FEATURE_DS_MPM_BIT                  11
 #define FEATURE_FCLK_DPM_BIT                12
 #define FEATURE_SOCCLK_DPM_BIT              13
@@ -56,9 +56,9 @@
 #define FEATURE_DS_GFXCLK_BIT               21
 #define FEATURE_DS_SOCCLK_BIT               22
 #define FEATURE_DS_LCLK_BIT                 23
-#define FEATURE_LOW_POWER_DCNCLKS_BIT       24  // for all DISP clks
+#define FEATURE_LOW_POWER_DCNCLKS_BIT       24
 #define FEATURE_DS_SHUBCLK_BIT              25
-#define FEATURE_SPARE0_BIT                  26  //SPARE
+#define FEATURE_RESERVED0_BIT               26
 #define FEATURE_ZSTATES_BIT                 27
 #define FEATURE_IOMMUL2_PG_BIT              28
 #define FEATURE_DS_FCLK_BIT                 29
@@ -66,8 +66,8 @@
 #define FEATURE_DS_MP1CLK_BIT               31
 #define FEATURE_WHISPER_MODE_BIT            32
 #define FEATURE_SMU_LOW_POWER_BIT           33
-#define FEATURE_SMART_L3_RINSER_BIT         34
-#define FEATURE_SPARE1_BIT                  35  //SPARE
+#define FEATURE_RESERVED1_BIT               34  /* v14_0_0 SMART_L3_RINSER; v14_0_1 RESERVED1 */
+#define FEATURE_GFX_DEM_BIT                 35  /* v14_0_0 SPARE; v14_0_1 GFX_DEM */
 #define FEATURE_PSI_BIT                     36
 #define FEATURE_PROCHOT_BIT                 37
 #define FEATURE_CPUOFF_BIT                  38
 #define FEATURE_PERF_LIMIT_BIT              42
 #define FEATURE_CORE_DLDO_BIT               43
 #define FEATURE_DVO_BIT                     44
-#define FEATURE_DS_VCN_BIT                  45
+#define FEATURE_DS_VCN_BIT                  45  /* v14_0_1 this is for both VCN0 and VCN1 */
 #define FEATURE_CPPC_BIT                    46
 #define FEATURE_CPPC_PREFERRED_CORES        47
 #define FEATURE_DF_CSTATES_BIT              48
-#define FEATURE_SPARE2_BIT                  49  //SPARE
+#define FEATURE_FAST_PSTATE_CLDO_BIT        49  /* v14_0_0 SPARE */
 #define FEATURE_ATHUB_PG_BIT                50
 #define FEATURE_VDDOFF_ECO_BIT              51
 #define FEATURE_ZSTATES_ECO_BIT             52
@@ -93,8 +93,8 @@
 #define FEATURE_DS_IPUCLK_BIT               58
 #define FEATURE_DS_VPECLK_BIT               59
 #define FEATURE_VPE_DPM_BIT                 60
-#define FEATURE_SPARE_61                    61
-#define FEATURE_FP_DIDT                     62
+#define FEATURE_SMART_L3_RINSER_BIT         61  /* v14_0_0 SPARE*/
+#define FEATURE_PCC_BIT                     62  /* v14_0_0 FP_DIDT v14_0_1 PCC_BIT */
 #define NUM_FEATURES                        63
 
 // Firmware Header/Footer
@@ -151,6 +151,43 @@ typedef struct {
   // MP1_EXT_SCRATCH7 = RTOS Current Job
 } FwStatus_t;
 
+typedef struct {
+  // MP1_EXT_SCRATCH0
+  uint32_t DpmHandlerID         : 8;
+  uint32_t ActivityMonitorID    : 8;
+  uint32_t DpmTimerID           : 8;
+  uint32_t DpmHubID             : 4;
+  uint32_t DpmHubTask           : 4;
+  // MP1_EXT_SCRATCH1
+  uint32_t CclkSyncStatus       : 8;
+  uint32_t ZstateStatus         : 4;
+  uint32_t Cpu1VddOff           : 4;
+  uint32_t DstateFun            : 4;
+  uint32_t DstateDev            : 4;
+  uint32_t GfxOffStatus         : 2;
+  uint32_t Cpu0Off              : 2;
+  uint32_t Cpu1Off              : 2;
+  uint32_t Cpu0VddOff           : 2;
+  // MP1_EXT_SCRATCH2
+  uint32_t P2JobHandler         :32;
+  // MP1_EXT_SCRATCH3
+  uint32_t PostCode             :32;
+  // MP1_EXT_SCRATCH4
+  uint32_t MsgPortBusy          :15;
+  uint32_t RsmuPmiP1Pending     : 1;
+  uint32_t RsmuPmiP2PendingCnt  : 8;
+  uint32_t DfCstateExitPending  : 1;
+  uint32_t Pc6EntryPending      : 1;
+  uint32_t Pc6ExitPending       : 1;
+  uint32_t WarmResetPending     : 1;
+  uint32_t Mp0ClkPending        : 1;
+  uint32_t InWhisperMode        : 1;
+  uint32_t spare2               : 2;
+  // MP1_EXT_SCRATCH5
+  uint32_t IdleMask             :32;
+  // MP1_EXT_SCRATCH6 = RTOS threads' status
+  // MP1_EXT_SCRATCH7 = RTOS Current Job
+} FwStatus_t_v14_0_1;
 
 #pragma pack(pop)
 
index ca7ce4251482dbdf22b5ea39a5e6ca55e763896d..c4dc5881d8df0953054cf6972d88f212e0c6872c 100644 (file)
 #define PPSMC_MSG_SetHardMinSocclkByFreq        0x13 ///< Set hard min for SOC CLK
 #define PPSMC_MSG_SetSoftMinFclk                0x14 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinVcn0                0x15 ///< Set soft min for VCN0 clocks (VCLK0 and DCLK0)
-
 #define PPSMC_MSG_EnableGfxImu                  0x16 ///< Enable GFX IMU
-
-#define PPSMC_MSG_spare_0x17                    0x17
-#define PPSMC_MSG_spare_0x18                    0x18
+#define PPSMC_MSG_spare_0x17                    0x17 ///< Get GFX clock frequency
+#define PPSMC_MSG_spare_0x18                    0x18 ///< Get FCLK frequency
 #define PPSMC_MSG_AllowGfxOff                   0x19 ///< Inform PMFW of allowing GFXOFF entry
 #define PPSMC_MSG_DisallowGfxOff                0x1A ///< Inform PMFW of disallowing GFXOFF entry
 #define PPSMC_MSG_SetSoftMaxGfxClk              0x1B ///< Set soft max for GFX CLK
 #define PPSMC_MSG_SetHardMinGfxClk              0x1C ///< Set hard min for GFX CLK
-
 #define PPSMC_MSG_SetSoftMaxSocclkByFreq        0x1D ///< Set soft max for SOC CLK
 #define PPSMC_MSG_SetSoftMaxFclkByFreq          0x1E ///< Set soft max for FCLK
 #define PPSMC_MSG_SetSoftMaxVcn0                0x1F ///< Set soft max for VCN0 clocks (VCLK0 and DCLK0)
-#define PPSMC_MSG_spare_0x20                    0x20
+#define PPSMC_MSG_spare_0x20                    0x20 ///< Set power limit percentage
 #define PPSMC_MSG_PowerDownJpeg0                0x21 ///< Power down Jpeg of VCN0
 #define PPSMC_MSG_PowerUpJpeg0                  0x22 ///< Power up Jpeg of VCN0; VCN0 is power gated by default
-
 #define PPSMC_MSG_SetHardMinFclkByFreq          0x23 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinSocclkByFreq        0x24 ///< Set soft min for SOC CLK
 #define PPSMC_MSG_AllowZstates                  0x25 ///< Inform PMFM of allowing Zstate entry, i.e. no Miracast activity
@@ -99,8 +95,8 @@
 #define PPSMC_MSG_PowerUpIspByTile              0x2A ///< This message is used to power up ISP tiles and enable the ISP DPM
 #define PPSMC_MSG_SetHardMinIspiclkByFreq       0x2B ///< Set HardMin by frequency for ISPICLK
 #define PPSMC_MSG_SetHardMinIspxclkByFreq       0x2C ///< Set HardMin by frequency for ISPXCLK
-#define PPSMC_MSG_PowerDownUmsch                0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_MSG_PowerUpUmsch                  0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerDownUmsch                0x2D ///< Power down VCN0.UMSCH (aka VSCH) scheduler
+#define PPSMC_MSG_PowerUpUmsch                  0x2E ///< Power up VCN0.UMSCH (aka VSCH) scheduler
 #define PPSMC_Message_IspStutterOn_MmhubPgDis   0x2F ///< ISP StutterOn mmHub PgDis
 #define PPSMC_Message_IspStutterOff_MmhubPgEn   0x30 ///< ISP StufferOff mmHub PgEn
 #define PPSMC_MSG_PowerUpVpe                    0x31 ///< Power up VPE
 #define PPSMC_MSG_DisableLSdma                  0x35 ///< Disable LSDMA
 #define PPSMC_MSG_SetSoftMaxVpe                 0x36 ///<
 #define PPSMC_MSG_SetSoftMinVpe                 0x37 ///<
-#define PPSMC_Message_Count                     0x38 ///< Total number of PPSMC messages
+#define PPSMC_MSG_AllocMALLCache                0x38 ///< Allocating MALL Cache
+#define PPSMC_MSG_ReleaseMALLCache              0x39 ///< Releasing MALL Cache
+#define PPSMC_Message_Count                     0x3A ///< Total number of PPSMC messages
 /** @}*/
 
 /**
index 3f7463c1c1a91948588ae8ece2fd6c4cbffb1406..4af1985ae44668edf74b40c4f26dbd1bcd83c376 100644 (file)
@@ -27,6 +27,7 @@
 
 #define SMU14_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_0 0x7
+#define SMU14_DRIVER_IF_VERSION_SMU_V14_0_1 0x6
 #define SMU14_DRIVER_IF_VERSION_SMU_V14_0_2 0x1
 
 #define FEATURE_MASK(feature) (1ULL << feature)
index 9c03296f92cdd41c868406dfd861bf56a77c2e81..67117ced7c6ae65405fb3a5338743d31270e8cd3 100644 (file)
@@ -2751,7 +2751,13 @@ static int smu_v13_0_0_set_mp1_state(struct smu_context *smu,
 
        switch (mp1_state) {
        case PP_MP1_STATE_UNLOAD:
-               ret = smu_cmn_set_mp1_state(smu, mp1_state);
+               ret = smu_cmn_send_smc_msg_with_param(smu,
+                                                                                         SMU_MSG_PrepareMp1ForUnload,
+                                                                                         0x55, NULL);
+
+               if (!ret && smu->smu_baco.state == SMU_BACO_STATE_EXIT)
+                       ret = smu_v13_0_disable_pmfw_state(smu);
+
                break;
        default:
                /* Ignore others */
index bb98156b2fa1d5fff3d71bcea59b2b63f9265b9e..949131bd1ecb215c960b7aabb9ad690da715d90c 100644 (file)
@@ -226,8 +226,18 @@ static int smu_v13_0_4_system_features_control(struct smu_context *smu, bool en)
        struct amdgpu_device *adev = smu->adev;
        int ret = 0;
 
-       if (!en && !adev->in_s0ix)
+       if (!en && !adev->in_s0ix) {
+               /* Adds a GFX reset as workaround just before sending the
+                * MP1_UNLOAD message to prevent GC/RLC/PMFW from entering
+                * an invalid state.
+                */
+               ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GfxDeviceDriverReset,
+                                                     SMU_RESET_MODE_2, NULL);
+               if (ret)
+                       return ret;
+
                ret = smu_cmn_send_smc_msg(smu, SMU_MSG_PrepareMp1ForUnload, NULL);
+       }
 
        return ret;
 }
index 9e39f99154f94df84495dbce069e2651f2b7f104..07a65e005785d6d0fceddd2564d63e84d08e755e 100644 (file)
@@ -234,7 +234,7 @@ int smu_v14_0_check_fw_version(struct smu_context *smu)
                smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
                break;
        case IP_VERSION(14, 0, 1):
-               smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_0;
+               smu->smc_driver_if_version = SMU14_DRIVER_IF_VERSION_SMU_V14_0_1;
                break;
 
        default:
index d6de6d97286c6990e24c79b318f533168c967bd0..63399c00cc28ffaa88725068496f35625b9807cc 100644 (file)
@@ -161,7 +161,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
 
        SMU_TABLE_INIT(tables, SMU_TABLE_WATERMARKS, sizeof(Watermarks_t),
                PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
-       SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, sizeof(DpmClocks_t),
+       SMU_TABLE_INIT(tables, SMU_TABLE_DPMCLOCKS, max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)),
                PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
        SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(SmuMetrics_t),
                PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
@@ -171,7 +171,7 @@ static int smu_v14_0_0_init_smc_tables(struct smu_context *smu)
                goto err0_out;
        smu_table->metrics_time = 0;
 
-       smu_table->clocks_table = kzalloc(sizeof(DpmClocks_t), GFP_KERNEL);
+       smu_table->clocks_table = kzalloc(max(sizeof(DpmClocks_t), sizeof(DpmClocks_t_v14_0_1)), GFP_KERNEL);
        if (!smu_table->clocks_table)
                goto err1_out;
 
@@ -593,6 +593,60 @@ static int smu_v14_0_0_mode2_reset(struct smu_context *smu)
        return ret;
 }
 
+static int smu_v14_0_1_get_dpm_freq_by_index(struct smu_context *smu,
+                                               enum smu_clk_type clk_type,
+                                               uint32_t dpm_level,
+                                               uint32_t *freq)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+       if (!clk_table || clk_type >= SMU_CLK_COUNT)
+               return -EINVAL;
+
+       switch (clk_type) {
+       case SMU_SOCCLK:
+               if (dpm_level >= clk_table->NumSocClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->SocClocks[dpm_level];
+               break;
+       case SMU_VCLK:
+               if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->VClocks0[dpm_level];
+               break;
+       case SMU_DCLK:
+               if (dpm_level >= clk_table->Vcn0ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->DClocks0[dpm_level];
+               break;
+       case SMU_VCLK1:
+               if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->VClocks1[dpm_level];
+               break;
+       case SMU_DCLK1:
+               if (dpm_level >= clk_table->Vcn1ClkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->DClocks1[dpm_level];
+               break;
+       case SMU_UCLK:
+       case SMU_MCLK:
+               if (dpm_level >= clk_table->NumMemPstatesEnabled)
+                       return -EINVAL;
+               *freq = clk_table->MemPstateTable[dpm_level].MemClk;
+               break;
+       case SMU_FCLK:
+               if (dpm_level >= clk_table->NumFclkLevelsEnabled)
+                       return -EINVAL;
+               *freq = clk_table->FclkClocks_Freq[dpm_level];
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
                                                enum smu_clk_type clk_type,
                                                uint32_t dpm_level,
@@ -637,6 +691,19 @@ static int smu_v14_0_0_get_dpm_freq_by_index(struct smu_context *smu,
        return 0;
 }
 
+static int smu_v14_0_common_get_dpm_freq_by_index(struct smu_context *smu,
+                                               enum smu_clk_type clk_type,
+                                               uint32_t dpm_level,
+                                               uint32_t *freq)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_get_dpm_freq_by_index(smu, clk_type, dpm_level, freq);
+
+       return 0;
+}
+
 static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
                                                enum smu_clk_type clk_type)
 {
@@ -657,6 +724,8 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
                break;
        case SMU_VCLK:
        case SMU_DCLK:
+       case SMU_VCLK1:
+       case SMU_DCLK1:
                feature_id = SMU_FEATURE_VCN_DPM_BIT;
                break;
        default:
@@ -666,6 +735,126 @@ static bool smu_v14_0_0_clk_dpm_is_enabled(struct smu_context *smu,
        return smu_cmn_feature_is_enabled(smu, feature_id);
 }
 
+static int smu_v14_0_1_get_dpm_ultimate_freq(struct smu_context *smu,
+                                                       enum smu_clk_type clk_type,
+                                                       uint32_t *min,
+                                                       uint32_t *max)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+       uint32_t clock_limit;
+       uint32_t max_dpm_level, min_dpm_level;
+       int ret = 0;
+
+       if (!smu_v14_0_0_clk_dpm_is_enabled(smu, clk_type)) {
+               switch (clk_type) {
+               case SMU_MCLK:
+               case SMU_UCLK:
+                       clock_limit = smu->smu_table.boot_values.uclk;
+                       break;
+               case SMU_FCLK:
+                       clock_limit = smu->smu_table.boot_values.fclk;
+                       break;
+               case SMU_GFXCLK:
+               case SMU_SCLK:
+                       clock_limit = smu->smu_table.boot_values.gfxclk;
+                       break;
+               case SMU_SOCCLK:
+                       clock_limit = smu->smu_table.boot_values.socclk;
+                       break;
+               case SMU_VCLK:
+               case SMU_VCLK1:
+                       clock_limit = smu->smu_table.boot_values.vclk;
+                       break;
+               case SMU_DCLK:
+               case SMU_DCLK1:
+                       clock_limit = smu->smu_table.boot_values.dclk;
+                       break;
+               default:
+                       clock_limit = 0;
+                       break;
+               }
+
+               /* clock in Mhz unit */
+               if (min)
+                       *min = clock_limit / 100;
+               if (max)
+                       *max = clock_limit / 100;
+
+               return 0;
+       }
+
+       if (max) {
+               switch (clk_type) {
+               case SMU_GFXCLK:
+               case SMU_SCLK:
+                       *max = clk_table->MaxGfxClk;
+                       break;
+               case SMU_MCLK:
+               case SMU_UCLK:
+               case SMU_FCLK:
+                       max_dpm_level = 0;
+                       break;
+               case SMU_SOCCLK:
+                       max_dpm_level = clk_table->NumSocClkLevelsEnabled - 1;
+                       break;
+               case SMU_VCLK:
+               case SMU_DCLK:
+                       max_dpm_level = clk_table->Vcn0ClkLevelsEnabled - 1;
+                       break;
+               case SMU_VCLK1:
+               case SMU_DCLK1:
+                       max_dpm_level = clk_table->Vcn1ClkLevelsEnabled - 1;
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto failed;
+               }
+
+               if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+                       if (ret)
+                               goto failed;
+               }
+       }
+
+       if (min) {
+               switch (clk_type) {
+               case SMU_GFXCLK:
+               case SMU_SCLK:
+                       *min = clk_table->MinGfxClk;
+                       break;
+               case SMU_MCLK:
+               case SMU_UCLK:
+                       min_dpm_level = clk_table->NumMemPstatesEnabled - 1;
+                       break;
+               case SMU_FCLK:
+                       min_dpm_level = clk_table->NumFclkLevelsEnabled - 1;
+                       break;
+               case SMU_SOCCLK:
+                       min_dpm_level = 0;
+                       break;
+               case SMU_VCLK:
+               case SMU_DCLK:
+               case SMU_VCLK1:
+               case SMU_DCLK1:
+                       min_dpm_level = 0;
+                       break;
+               default:
+                       ret = -EINVAL;
+                       goto failed;
+               }
+
+               if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+                       if (ret)
+                               goto failed;
+               }
+       }
+
+failed:
+       return ret;
+}
+
 static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
                                                        enum smu_clk_type clk_type,
                                                        uint32_t *min,
@@ -736,7 +925,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
                }
 
                if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
-                       ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, max_dpm_level, max);
                        if (ret)
                                goto failed;
                }
@@ -768,7 +957,7 @@ static int smu_v14_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
                }
 
                if (clk_type != SMU_GFXCLK && clk_type != SMU_SCLK) {
-                       ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, min_dpm_level, min);
                        if (ret)
                                goto failed;
                }
@@ -778,6 +967,19 @@ failed:
        return ret;
 }
 
+static int smu_v14_0_common_get_dpm_ultimate_freq(struct smu_context *smu,
+                                                       enum smu_clk_type clk_type,
+                                                       uint32_t *min,
+                                                       uint32_t *max)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_get_dpm_ultimate_freq(smu, clk_type, min, max);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_get_dpm_ultimate_freq(smu, clk_type, min, max);
+
+       return 0;
+}
+
 static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
                                            enum smu_clk_type clk_type,
                                            uint32_t *value)
@@ -811,6 +1013,37 @@ static int smu_v14_0_0_get_current_clk_freq(struct smu_context *smu,
        return smu_v14_0_0_get_smu_metrics_data(smu, member_type, value);
 }
 
+static int smu_v14_0_1_get_dpm_level_count(struct smu_context *smu,
+                                          enum smu_clk_type clk_type,
+                                          uint32_t *count)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+       switch (clk_type) {
+       case SMU_SOCCLK:
+               *count = clk_table->NumSocClkLevelsEnabled;
+               break;
+       case SMU_VCLK:
+       case SMU_DCLK:
+               *count = clk_table->Vcn0ClkLevelsEnabled;
+               break;
+       case SMU_VCLK1:
+       case SMU_DCLK1:
+               *count = clk_table->Vcn1ClkLevelsEnabled;
+               break;
+       case SMU_MCLK:
+               *count = clk_table->NumMemPstatesEnabled;
+               break;
+       case SMU_FCLK:
+               *count = clk_table->NumFclkLevelsEnabled;
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
 static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
                                           enum smu_clk_type clk_type,
                                           uint32_t *count)
@@ -840,6 +1073,18 @@ static int smu_v14_0_0_get_dpm_level_count(struct smu_context *smu,
        return 0;
 }
 
+static int smu_v14_0_common_get_dpm_level_count(struct smu_context *smu,
+                                          enum smu_clk_type clk_type,
+                                          uint32_t *count)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_get_dpm_level_count(smu, clk_type, count);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_get_dpm_level_count(smu, clk_type, count);
+
+       return 0;
+}
+
 static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
                                        enum smu_clk_type clk_type, char *buf)
 {
@@ -866,18 +1111,20 @@ static int smu_v14_0_0_print_clk_levels(struct smu_context *smu,
        case SMU_SOCCLK:
        case SMU_VCLK:
        case SMU_DCLK:
+       case SMU_VCLK1:
+       case SMU_DCLK1:
        case SMU_MCLK:
        case SMU_FCLK:
                ret = smu_v14_0_0_get_current_clk_freq(smu, clk_type, &cur_value);
                if (ret)
                        break;
 
-               ret = smu_v14_0_0_get_dpm_level_count(smu, clk_type, &count);
+               ret = smu_v14_0_common_get_dpm_level_count(smu, clk_type, &count);
                if (ret)
                        break;
 
                for (i = 0; i < count; i++) {
-                       ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, i, &value);
+                       ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, i, &value);
                        if (ret)
                                break;
 
@@ -940,8 +1187,13 @@ static int smu_v14_0_0_set_soft_freq_limited_range(struct smu_context *smu,
                break;
        case SMU_VCLK:
        case SMU_DCLK:
-               msg_set_min = SMU_MSG_SetHardMinVcn;
-               msg_set_max = SMU_MSG_SetSoftMaxVcn;
+               msg_set_min = SMU_MSG_SetHardMinVcn0;
+               msg_set_max = SMU_MSG_SetSoftMaxVcn0;
+               break;
+       case SMU_VCLK1:
+       case SMU_DCLK1:
+               msg_set_min = SMU_MSG_SetHardMinVcn1;
+               msg_set_max = SMU_MSG_SetSoftMaxVcn1;
                break;
        default:
                return -EINVAL;
@@ -971,11 +1223,11 @@ static int smu_v14_0_0_force_clk_levels(struct smu_context *smu,
        case SMU_FCLK:
        case SMU_VCLK:
        case SMU_DCLK:
-               ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
+               ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_min_level, &min_freq);
                if (ret)
                        break;
 
-               ret = smu_v14_0_0_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
+               ret = smu_v14_0_common_get_dpm_freq_by_index(smu, clk_type, soft_max_level, &max_freq);
                if (ret)
                        break;
 
@@ -1000,25 +1252,25 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
 
        switch (level) {
        case AMD_DPM_FORCED_LEVEL_HIGH:
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, NULL, &sclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_max);
                sclk_min = sclk_max;
                fclk_min = fclk_max;
                socclk_min = socclk_max;
                break;
        case AMD_DPM_FORCED_LEVEL_LOW:
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, NULL);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, NULL);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, NULL);
                sclk_max = sclk_min;
                fclk_max = fclk_min;
                socclk_max = socclk_min;
                break;
        case AMD_DPM_FORCED_LEVEL_AUTO:
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
-               smu_v14_0_0_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SCLK, &sclk_min, &sclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_FCLK, &fclk_min, &fclk_max);
+               smu_v14_0_common_get_dpm_ultimate_freq(smu, SMU_SOCCLK, &socclk_min, &socclk_max);
                break;
        case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
        case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
@@ -1067,6 +1319,18 @@ static int smu_v14_0_0_set_performance_level(struct smu_context *smu,
        return ret;
 }
 
+static int smu_v14_0_1_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+
+       smu->gfx_default_hard_min_freq = clk_table->MinGfxClk;
+       smu->gfx_default_soft_max_freq = clk_table->MaxGfxClk;
+       smu->gfx_actual_hard_min_freq = 0;
+       smu->gfx_actual_soft_max_freq = 0;
+
+       return 0;
+}
+
 static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
 {
        DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1079,6 +1343,16 @@ static int smu_v14_0_0_set_fine_grain_gfx_freq_parameters(struct smu_context *sm
        return 0;
 }
 
+static int smu_v14_0_common_set_fine_grain_gfx_freq_parameters(struct smu_context *smu)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_v14_0_0_set_fine_grain_gfx_freq_parameters(smu);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_v14_0_1_set_fine_grain_gfx_freq_parameters(smu);
+
+       return 0;
+}
+
 static int smu_v14_0_0_set_vpe_enable(struct smu_context *smu,
                                      bool enable)
 {
@@ -1095,6 +1369,25 @@ static int smu_v14_0_0_set_umsch_mm_enable(struct smu_context *smu,
                                               0, NULL);
 }
 
+static int smu_14_0_1_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+       DpmClocks_t_v14_0_1 *clk_table = smu->smu_table.clocks_table;
+       uint8_t idx;
+
+       /* Only the Clock information of SOC and VPE is copied to provide VPE DPM settings for use. */
+       for (idx = 0; idx < NUM_SOCCLK_DPM_LEVELS; idx++) {
+               clock_table->SocClocks[idx].Freq = (idx < clk_table->NumSocClkLevelsEnabled) ? clk_table->SocClocks[idx]:0;
+               clock_table->SocClocks[idx].Vol = 0;
+       }
+
+       for (idx = 0; idx < NUM_VPE_DPM_LEVELS; idx++) {
+               clock_table->VPEClocks[idx].Freq = (idx < clk_table->VpeClkLevelsEnabled) ? clk_table->VPEClocks[idx]:0;
+               clock_table->VPEClocks[idx].Vol = 0;
+       }
+
+       return 0;
+}
+
 static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
 {
        DpmClocks_t *clk_table = smu->smu_table.clocks_table;
@@ -1114,6 +1407,16 @@ static int smu_14_0_0_get_dpm_table(struct smu_context *smu, struct dpm_clocks *
        return 0;
 }
 
+static int smu_v14_0_common_get_dpm_table(struct smu_context *smu, struct dpm_clocks *clock_table)
+{
+       if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 0))
+               smu_14_0_0_get_dpm_table(smu, clock_table);
+       else if (amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(14, 0, 1))
+               smu_14_0_1_get_dpm_table(smu, clock_table);
+
+       return 0;
+}
+
 static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
        .check_fw_status = smu_v14_0_check_fw_status,
        .check_fw_version = smu_v14_0_check_fw_version,
@@ -1135,16 +1438,16 @@ static const struct pptable_funcs smu_v14_0_0_ppt_funcs = {
        .set_driver_table_location = smu_v14_0_set_driver_table_location,
        .gfx_off_control = smu_v14_0_gfx_off_control,
        .mode2_reset = smu_v14_0_0_mode2_reset,
-       .get_dpm_ultimate_freq = smu_v14_0_0_get_dpm_ultimate_freq,
+       .get_dpm_ultimate_freq = smu_v14_0_common_get_dpm_ultimate_freq,
        .od_edit_dpm_table = smu_v14_0_od_edit_dpm_table,
        .print_clk_levels = smu_v14_0_0_print_clk_levels,
        .force_clk_levels = smu_v14_0_0_force_clk_levels,
        .set_performance_level = smu_v14_0_0_set_performance_level,
-       .set_fine_grain_gfx_freq_parameters = smu_v14_0_0_set_fine_grain_gfx_freq_parameters,
+       .set_fine_grain_gfx_freq_parameters = smu_v14_0_common_set_fine_grain_gfx_freq_parameters,
        .set_gfx_power_up_by_imu = smu_v14_0_set_gfx_power_up_by_imu,
        .dpm_set_vpe_enable = smu_v14_0_0_set_vpe_enable,
        .dpm_set_umsch_mm_enable = smu_v14_0_0_set_umsch_mm_enable,
-       .get_dpm_clock_table = smu_14_0_0_get_dpm_table,
+       .get_dpm_clock_table = smu_v14_0_common_get_dpm_table,
 };
 
 static void smu_v14_0_0_set_smu_mailbox_registers(struct smu_context *smu)
index ebb6d8ebd44eb6f70480b9655e6f253e41c77c04..1e9259416980ec49cce1b7fc080f562f002e29c5 100644 (file)
@@ -180,6 +180,7 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
 {
        struct ast_device *ast = to_ast_device(dev);
        u8 video_on_off = on;
+       u32 i = 0;
 
        // Video On/Off
        ast_set_index_reg_mask(ast, AST_IO_VGACRI, 0xE3, (u8) ~AST_DP_VIDEO_ENABLE, on);
@@ -192,6 +193,8 @@ void ast_dp_set_on_off(struct drm_device *dev, bool on)
                                                ASTDP_MIRROR_VIDEO_ENABLE) != video_on_off) {
                        // wait 1 ms
                        mdelay(1);
+                       if (++i > 200)
+                               break;
                }
        }
 }
index 871e4e2129d6daac8dadcb3262227451c59296c8..0683a129b36285cc96c25d57d3115cb111fc2003 100644 (file)
@@ -777,6 +777,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
        unsigned int total_modes_count = 0;
        struct drm_client_offset *offsets;
        unsigned int connector_count = 0;
+       /* points to modes protected by mode_config.mutex */
        struct drm_display_mode **modes;
        struct drm_crtc **crtcs;
        int i, ret = 0;
@@ -845,7 +846,6 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
                drm_client_pick_crtcs(client, connectors, connector_count,
                                      crtcs, modes, 0, width, height);
        }
-       mutex_unlock(&dev->mode_config.mutex);
 
        drm_client_modeset_release(client);
 
@@ -875,6 +875,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
                        modeset->y = offset->y;
                }
        }
+       mutex_unlock(&dev->mode_config.mutex);
 
        mutex_unlock(&client->modeset_mutex);
 out:
index ed89b86ea625aaa408064916b982ffa92f9ef4b5..f672bfd70d455156aed1a17c2fb4929c7771962f 100644 (file)
@@ -2534,7 +2534,8 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
                intel_atomic_get_old_cdclk_state(state);
        const struct intel_cdclk_state *new_cdclk_state =
                intel_atomic_get_new_cdclk_state(state);
-       enum pipe pipe = new_cdclk_state->pipe;
+       struct intel_cdclk_config cdclk_config;
+       enum pipe pipe;
 
        if (!intel_cdclk_changed(&old_cdclk_state->actual,
                                 &new_cdclk_state->actual))
@@ -2543,12 +2544,25 @@ intel_set_cdclk_pre_plane_update(struct intel_atomic_state *state)
        if (IS_DG2(i915))
                intel_cdclk_pcode_pre_notify(state);
 
-       if (pipe == INVALID_PIPE ||
-           old_cdclk_state->actual.cdclk <= new_cdclk_state->actual.cdclk) {
-               drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+       if (new_cdclk_state->disable_pipes) {
+               cdclk_config = new_cdclk_state->actual;
+               pipe = INVALID_PIPE;
+       } else {
+               if (new_cdclk_state->actual.cdclk >= old_cdclk_state->actual.cdclk) {
+                       cdclk_config = new_cdclk_state->actual;
+                       pipe = new_cdclk_state->pipe;
+               } else {
+                       cdclk_config = old_cdclk_state->actual;
+                       pipe = INVALID_PIPE;
+               }
 
-               intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
+               cdclk_config.voltage_level = max(new_cdclk_state->actual.voltage_level,
+                                                old_cdclk_state->actual.voltage_level);
        }
+
+       drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+
+       intel_set_cdclk(i915, &cdclk_config, pipe);
 }
 
 /**
@@ -2566,7 +2580,7 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
                intel_atomic_get_old_cdclk_state(state);
        const struct intel_cdclk_state *new_cdclk_state =
                intel_atomic_get_new_cdclk_state(state);
-       enum pipe pipe = new_cdclk_state->pipe;
+       enum pipe pipe;
 
        if (!intel_cdclk_changed(&old_cdclk_state->actual,
                                 &new_cdclk_state->actual))
@@ -2575,12 +2589,15 @@ intel_set_cdclk_post_plane_update(struct intel_atomic_state *state)
        if (IS_DG2(i915))
                intel_cdclk_pcode_post_notify(state);
 
-       if (pipe != INVALID_PIPE &&
-           old_cdclk_state->actual.cdclk > new_cdclk_state->actual.cdclk) {
-               drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
+       if (!new_cdclk_state->disable_pipes &&
+           new_cdclk_state->actual.cdclk < old_cdclk_state->actual.cdclk)
+               pipe = new_cdclk_state->pipe;
+       else
+               pipe = INVALID_PIPE;
+
+       drm_WARN_ON(&i915->drm, !new_cdclk_state->base.changed);
 
-               intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
-       }
+       intel_set_cdclk(i915, &new_cdclk_state->actual, pipe);
 }
 
 static int intel_pixel_rate_to_cdclk(const struct intel_crtc_state *crtc_state)
@@ -3058,6 +3075,7 @@ static struct intel_global_state *intel_cdclk_duplicate_state(struct intel_globa
                return NULL;
 
        cdclk_state->pipe = INVALID_PIPE;
+       cdclk_state->disable_pipes = false;
 
        return &cdclk_state->base;
 }
@@ -3236,6 +3254,8 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
                if (ret)
                        return ret;
 
+               new_cdclk_state->disable_pipes = true;
+
                drm_dbg_kms(&dev_priv->drm,
                            "Modeset required for cdclk change\n");
        }
index 48fd7d39e0cd9c4f6d57970f35531b91aa6ab055..71bc032bfef16efd359757373f256dfc88bd86fc 100644 (file)
@@ -51,6 +51,9 @@ struct intel_cdclk_state {
 
        /* bitmask of active pipes */
        u8 active_pipes;
+
+       /* update cdclk with pipes disabled */
+       bool disable_pipes;
 };
 
 int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state);
index c587a8efeafcf5e561429d925c2893208908e03f..c17462b4c2ac1930a085eff2256f8642b9ce8830 100644 (file)
@@ -4256,7 +4256,12 @@ static bool m_n_equal(const struct intel_link_m_n *m_n_1,
 static bool crtcs_port_sync_compatible(const struct intel_crtc_state *crtc_state1,
                                       const struct intel_crtc_state *crtc_state2)
 {
+       /*
+        * FIXME the modeset sequence is currently wrong and
+        * can't deal with bigjoiner + port sync at the same time.
+        */
        return crtc_state1->hw.active && crtc_state2->hw.active &&
+               !crtc_state1->bigjoiner_pipes && !crtc_state2->bigjoiner_pipes &&
                crtc_state1->output_types == crtc_state2->output_types &&
                crtc_state1->output_format == crtc_state2->output_format &&
                crtc_state1->lane_count == crtc_state2->lane_count &&
index abd62bebc46d0e58d5bc78d8f4500ddcbc6098f1..e583515f9b25a33da4825d10cf42a9f73fa17990 100644 (file)
@@ -2725,7 +2725,11 @@ intel_dp_drrs_compute_config(struct intel_connector *connector,
                intel_panel_downclock_mode(connector, &pipe_config->hw.adjusted_mode);
        int pixel_clock;
 
-       if (has_seamless_m_n(connector))
+       /*
+        * FIXME all joined pipes share the same transcoder.
+        * Need to account for that when updating M/N live.
+        */
+       if (has_seamless_m_n(connector) && !pipe_config->bigjoiner_pipes)
                pipe_config->update_m_n = true;
 
        if (!can_enable_drrs(connector, pipe_config, downclock_mode)) {
index b98a87883fefb016be68ceb72a408258868b55ec..9db43bd81ce2fabe51963e129f135d3e8dd71fa7 100644 (file)
@@ -691,12 +691,15 @@ int intel_dp_hdcp_get_remote_capability(struct intel_connector *connector,
        u8 bcaps;
        int ret;
 
+       *hdcp_capable = false;
+       *hdcp2_capable = false;
        if (!intel_encoder_is_mst(connector->encoder))
                return -EINVAL;
 
        ret =  _intel_dp_hdcp2_get_capability(aux, hdcp2_capable);
        if (ret)
-               return ret;
+               drm_dbg_kms(&i915->drm,
+                           "HDCP2 DPCD capability read failed err: %d\n", ret);
 
        ret = intel_dp_hdcp_read_bcaps(aux, i915, &bcaps);
        if (ret)
index b6e539f1342c29ad97f5f46de8b51d9a358375bb..aabd018bd73743ff354353506b2ce007268a88c5 100644 (file)
@@ -1422,6 +1422,17 @@ void intel_psr_compute_config(struct intel_dp *intel_dp,
                return;
        }
 
+       /*
+        * FIXME figure out what is wrong with PSR+bigjoiner and
+        * fix it. Presumably something related to the fact that
+        * PSR is a transcoder level feature.
+        */
+       if (crtc_state->bigjoiner_pipes) {
+               drm_dbg_kms(&dev_priv->drm,
+                           "PSR disabled due to bigjoiner\n");
+               return;
+       }
+
        if (CAN_PANEL_REPLAY(intel_dp))
                crtc_state->has_panel_replay = true;
        else
index eb5bd0743902065d9b4bcac060d1fd340d448069..f542ee1db1d97047eedfffff76c04cbbaf3435ea 100644 (file)
@@ -117,6 +117,13 @@ intel_vrr_compute_config(struct intel_crtc_state *crtc_state,
        const struct drm_display_info *info = &connector->base.display_info;
        int vmin, vmax;
 
+       /*
+        * FIXME all joined pipes share the same transcoder.
+        * Need to account for that during VRR toggle/push/etc.
+        */
+       if (crtc_state->bigjoiner_pipes)
+               return;
+
        if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
                return;
 
index f3dcae4b9d455ed37d3cc3fd1635760cd9e264af..0f83c6d4376ffba646279586479f1710161d6633 100644 (file)
@@ -1403,14 +1403,17 @@ static void guc_cancel_busyness_worker(struct intel_guc *guc)
         * Trying to pass a 'need_sync' or 'in_reset' flag all the way down through
         * every possible call stack is unfeasible. It would be too intrusive to many
         * areas that really don't care about the GuC backend. However, there is the
-        * 'reset_in_progress' flag available, so just use that.
+        * I915_RESET_BACKOFF flag and the gt->reset.mutex can be tested for is_locked.
+        * So just use those. Note that testing both is required due to the hideously
+        * complex nature of the i915 driver's reset code paths.
         *
         * And note that in the case of a reset occurring during driver unload
-        * (wedge_on_fini), skipping the cancel in _prepare (when the reset flag is set
-        * is fine because there is another cancel in _finish (when the reset flag is
-        * not).
+        * (wedged_on_fini), skipping the cancel in reset_prepare/reset_fini (when the
+        * reset flag/mutex are set) is fine because there is another explicit cancel in
+        * intel_guc_submission_fini (when the reset flag/mutex are not).
         */
-       if (guc_to_gt(guc)->uc.reset_in_progress)
+       if (mutex_is_locked(&guc_to_gt(guc)->reset.mutex) ||
+           test_bit(I915_RESET_BACKOFF, &guc_to_gt(guc)->reset.flags))
                cancel_delayed_work(&guc->timestamp.work);
        else
                cancel_delayed_work_sync(&guc->timestamp.work);
@@ -1424,8 +1427,6 @@ static void __reset_guc_busyness_stats(struct intel_guc *guc)
        unsigned long flags;
        ktime_t unused;
 
-       guc_cancel_busyness_worker(guc);
-
        spin_lock_irqsave(&guc->timestamp.lock, flags);
 
        guc_update_pm_timestamp(guc, &unused);
@@ -2004,13 +2005,6 @@ void intel_guc_submission_cancel_requests(struct intel_guc *guc)
 
 void intel_guc_submission_reset_finish(struct intel_guc *guc)
 {
-       /*
-        * Ensure the busyness worker gets cancelled even on a fatal wedge.
-        * Note that reset_prepare is not allowed to because it confuses lockdep.
-        */
-       if (guc_submission_initialized(guc))
-               guc_cancel_busyness_worker(guc);
-
        /* Reset called during driver load or during wedge? */
        if (unlikely(!guc_submission_initialized(guc) ||
                     !intel_guc_is_fw_running(guc) ||
@@ -2136,6 +2130,7 @@ void intel_guc_submission_fini(struct intel_guc *guc)
        if (!guc->submission_initialized)
                return;
 
+       guc_fini_engine_stats(guc);
        guc_flush_destroyed_contexts(guc);
        guc_lrc_desc_pool_destroy_v69(guc);
        i915_sched_engine_put(guc->sched_engine);
index 6dfe5d9456c69e06987be23367c243bb1f8f908e..399bc319180b042cdcf78e2415b16ef52d980c61 100644 (file)
@@ -637,6 +637,10 @@ void intel_uc_reset_finish(struct intel_uc *uc)
 {
        struct intel_guc *guc = &uc->guc;
 
+       /*
+        * NB: The wedge code path results in prepare -> prepare -> finish -> finish.
+        * So this function is sometimes called with the in-progress flag not set.
+        */
        uc->reset_in_progress = false;
 
        /* Firmware expected to be running when this function is called */
index 0674aca0f8a3f593bad4dbe929be4260f5a6219a..cf0b1de1c07124d2fe45d2f7f220f5cebed71227 100644 (file)
@@ -1377,6 +1377,10 @@ static void a6xx_calc_ubwc_config(struct adreno_gpu *gpu)
        if (adreno_is_a618(gpu))
                gpu->ubwc_config.highest_bank_bit = 14;
 
+       if (adreno_is_a619(gpu))
+               /* TODO: Should be 14 but causes corruption at e.g. 1920x1200 on DP */
+               gpu->ubwc_config.highest_bank_bit = 13;
+
        if (adreno_is_a619_holi(gpu))
                gpu->ubwc_config.highest_bank_bit = 13;
 
index 1f5245fc2cdc6ca6ffd109fa6844eda84f79cd32..a847a0f7a73c9f61fde92fcf75f36a4f37dadf07 100644 (file)
@@ -852,7 +852,7 @@ static void a6xx_get_shader_block(struct msm_gpu *gpu,
                        (block->type << 8) | i);
 
                in += CRASHDUMP_READ(in, REG_A6XX_HLSQ_DBG_AHB_READ_APERTURE,
-                       block->size, dumper->iova + A6XX_CD_DATA_OFFSET);
+                       block->size, out);
 
                out += block->size * sizeof(u32);
        }
index 9a9f7092c526a630c8cb8099e7ae0921d6b1d3a1..a3e60ac70689e7f8af8813d978626cd7d4c9fb3e 100644 (file)
@@ -324,6 +324,7 @@ static const struct dpu_wb_cfg x1e80100_wb[] = {
        },
 };
 
+/* TODO: INTF 3, 8 and 7 are used for MST, marked as INTF_NONE for now */
 static const struct dpu_intf_cfg x1e80100_intf[] = {
        {
                .name = "intf_0", .id = INTF_0,
@@ -358,8 +359,8 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
                .name = "intf_3", .id = INTF_3,
                .base = 0x37000, .len = 0x280,
                .features = INTF_SC7280_MASK,
-               .type = INTF_DP,
-               .controller_id = MSM_DP_CONTROLLER_1,
+               .type = INTF_NONE,
+               .controller_id = MSM_DP_CONTROLLER_0,   /* pair with intf_0 for DP MST */
                .prog_fetch_lines_worst_case = 24,
                .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 30),
                .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 31),
@@ -368,7 +369,7 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
                .base = 0x38000, .len = 0x280,
                .features = INTF_SC7280_MASK,
                .type = INTF_DP,
-               .controller_id = MSM_DP_CONTROLLER_2,
+               .controller_id = MSM_DP_CONTROLLER_1,
                .prog_fetch_lines_worst_case = 24,
                .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 20),
                .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 21),
@@ -381,6 +382,33 @@ static const struct dpu_intf_cfg x1e80100_intf[] = {
                .prog_fetch_lines_worst_case = 24,
                .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 22),
                .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 23),
+       }, {
+               .name = "intf_6", .id = INTF_6,
+               .base = 0x3A000, .len = 0x280,
+               .features = INTF_SC7280_MASK,
+               .type = INTF_DP,
+               .controller_id = MSM_DP_CONTROLLER_2,
+               .prog_fetch_lines_worst_case = 24,
+               .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 17),
+               .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 16),
+       }, {
+               .name = "intf_7", .id = INTF_7,
+               .base = 0x3b000, .len = 0x280,
+               .features = INTF_SC7280_MASK,
+               .type = INTF_NONE,
+               .controller_id = MSM_DP_CONTROLLER_2,   /* pair with intf_6 for DP MST */
+               .prog_fetch_lines_worst_case = 24,
+               .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 18),
+               .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 19),
+       }, {
+               .name = "intf_8", .id = INTF_8,
+               .base = 0x3c000, .len = 0x280,
+               .features = INTF_SC7280_MASK,
+               .type = INTF_NONE,
+               .controller_id = MSM_DP_CONTROLLER_1,   /* pair with intf_4 for DP MST */
+               .prog_fetch_lines_worst_case = 24,
+               .intr_underrun = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 12),
+               .intr_vsync = DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 13),
        },
 };
 
index ef871239adb2a37e11c6d364d85f7384403459ee..68fae048a9a837410eb6051f9af52a6e0c399585 100644 (file)
@@ -459,15 +459,15 @@ int dpu_core_perf_debugfs_init(struct dpu_kms *dpu_kms, struct dentry *parent)
                        &perf->core_clk_rate);
        debugfs_create_u32("enable_bw_release", 0600, entry,
                        (u32 *)&perf->enable_bw_release);
-       debugfs_create_u32("threshold_low", 0600, entry,
+       debugfs_create_u32("threshold_low", 0400, entry,
                        (u32 *)&perf->perf_cfg->max_bw_low);
-       debugfs_create_u32("threshold_high", 0600, entry,
+       debugfs_create_u32("threshold_high", 0400, entry,
                        (u32 *)&perf->perf_cfg->max_bw_high);
-       debugfs_create_u32("min_core_ib", 0600, entry,
+       debugfs_create_u32("min_core_ib", 0400, entry,
                        (u32 *)&perf->perf_cfg->min_core_ib);
-       debugfs_create_u32("min_llcc_ib", 0600, entry,
+       debugfs_create_u32("min_llcc_ib", 0400, entry,
                        (u32 *)&perf->perf_cfg->min_llcc_ib);
-       debugfs_create_u32("min_dram_ib", 0600, entry,
+       debugfs_create_u32("min_dram_ib", 0400, entry,
                        (u32 *)&perf->perf_cfg->min_dram_ib);
        debugfs_create_file("perf_mode", 0600, entry,
                        (u32 *)perf, &dpu_core_perf_mode_fops);
index 946dd0135dffcf7dcd2b7f6445c62c048a044e8d..6a0a74832fb64d95adc6b0524ba15bd1faaa0bb1 100644 (file)
@@ -525,14 +525,14 @@ int dpu_core_irq_register_callback(struct dpu_kms *dpu_kms,
        int ret;
 
        if (!irq_cb) {
-               DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
-                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
+               DPU_ERROR("IRQ=[%d, %d] NULL callback\n",
+                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
                return -EINVAL;
        }
 
        if (!dpu_core_irq_is_valid(irq_idx)) {
-               DPU_ERROR("invalid IRQ=[%d, %d]\n",
-                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx));
+               DPU_ERROR("invalid IRQ=[%d, %d] irq_cb:%ps\n",
+                         DPU_IRQ_REG(irq_idx), DPU_IRQ_BIT(irq_idx), irq_cb);
                return -EINVAL;
        }
 
index c4cb82af5c2f2f77ae7c9804f4fd6a12c42d42c0..ffbfde9225898619c11b6fd3d59062ed1a65b719 100644 (file)
@@ -484,7 +484,7 @@ static void dp_display_handle_video_request(struct dp_display_private *dp)
        }
 }
 
-static int dp_display_handle_port_ststus_changed(struct dp_display_private *dp)
+static int dp_display_handle_port_status_changed(struct dp_display_private *dp)
 {
        int rc = 0;
 
@@ -541,7 +541,7 @@ static int dp_display_usbpd_attention_cb(struct device *dev)
                drm_dbg_dp(dp->drm_dev, "hpd_state=%d sink_request=%d\n",
                                        dp->hpd_state, sink_request);
                if (sink_request & DS_PORT_STATUS_CHANGED)
-                       rc = dp_display_handle_port_ststus_changed(dp);
+                       rc = dp_display_handle_port_status_changed(dp);
                else
                        rc = dp_display_handle_irq_hpd(dp);
        }
@@ -588,6 +588,7 @@ static int dp_hpd_plug_handle(struct dp_display_private *dp, u32 data)
        ret = dp_display_usbpd_configure_cb(&pdev->dev);
        if (ret) {      /* link train failed */
                dp->hpd_state = ST_DISCONNECTED;
+               pm_runtime_put_sync(&pdev->dev);
        } else {
                dp->hpd_state = ST_MAINLINK_READY;
        }
@@ -645,6 +646,7 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
                dp_display_host_phy_exit(dp);
                dp->hpd_state = ST_DISCONNECTED;
                dp_display_notify_disconnect(&dp->dp_display.pdev->dev);
+               pm_runtime_put_sync(&pdev->dev);
                mutex_unlock(&dp->event_mutex);
                return 0;
        }
index e3f61c39df69b4c31ffae28ea7f2ecab500f8863..80166f702a0dbab3a36a489c3c853e35533b4fe2 100644 (file)
@@ -89,7 +89,7 @@ int msm_framebuffer_prepare(struct drm_framebuffer *fb,
 
        for (i = 0; i < n; i++) {
                ret = msm_gem_get_and_pin_iova(fb->obj[i], aspace, &msm_fb->iova[i]);
-               drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)",
+               drm_dbg_state(fb->dev, "FB[%u]: iova[%d]: %08llx (%d)\n",
                              fb->base.id, i, msm_fb->iova[i], ret);
                if (ret)
                        return ret;
@@ -176,7 +176,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
        const struct msm_format *format;
        int ret, i, n;
 
-       drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)",
+       drm_dbg_state(dev, "create framebuffer: mode_cmd=%p (%dx%d@%4.4s)\n",
                        mode_cmd, mode_cmd->width, mode_cmd->height,
                        (char *)&mode_cmd->pixel_format);
 
@@ -232,7 +232,7 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev,
 
        refcount_set(&msm_fb->dirtyfb, 1);
 
-       drm_dbg_state(dev, "create: FB ID: %d (%p)", fb->base.id, fb);
+       drm_dbg_state(dev, "create: FB ID: %d (%p)\n", fb->base.id, fb);
 
        return fb;
 
index 84c21ec2ceeae08d8506688f73acf530ef40012b..af6a6fcb11736f6dc7637805647b9c717e684a09 100644 (file)
@@ -149,7 +149,7 @@ int msm_crtc_enable_vblank(struct drm_crtc *crtc)
        struct msm_kms *kms = priv->kms;
        if (!kms)
                return -ENXIO;
-       drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+       drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
        return vblank_ctrl_queue_work(priv, crtc, true);
 }
 
@@ -160,7 +160,7 @@ void msm_crtc_disable_vblank(struct drm_crtc *crtc)
        struct msm_kms *kms = priv->kms;
        if (!kms)
                return;
-       drm_dbg_vbl(dev, "crtc=%u", crtc->base.id);
+       drm_dbg_vbl(dev, "crtc=%u\n", crtc->base.id);
        vblank_ctrl_queue_work(priv, crtc, false);
 }
 
index 479effcf607e261fac73361958a0a855cf90d315..79cfab53f80e259093b7ae0f04310f6470a3c930 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 #include "nouveau_drv.h"
+#include "nouveau_bios.h"
 #include "nouveau_reg.h"
 #include "dispnv04/hw.h"
 #include "nouveau_encoder.h"
@@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf)
         */
        if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) {
                if (*conn == 0xf2005014 && *conf == 0xffffffff) {
-                       fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1);
+                       fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B);
                        return false;
                }
        }
@@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios)
 #ifdef __powerpc__
        /* Apple iMac G4 NV17 */
        if (of_machine_is_compatible("PowerMac4,5")) {
-               fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1);
-               fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2);
+               fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B);
+               fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C);
                return;
        }
 #endif
 
        /* Make up some sane defaults */
        fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG,
-                            bios->legacy.i2c_indices.crt, 1, 1);
+                            bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B);
 
        if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0)
                fabricate_dcb_output(dcb, DCB_OUTPUT_TV,
                                     bios->legacy.i2c_indices.tv,
-                                    all_heads, 0);
+                                    all_heads, DCB_OUTPUT_A);
 
        else if (bios->tmds.output0_script_ptr ||
                 bios->tmds.output1_script_ptr)
                fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS,
                                     bios->legacy.i2c_indices.panel,
-                                    all_heads, 1);
+                                    all_heads, DCB_OUTPUT_B);
 }
 
 static int
index 7de7707ec6a895ee2a914150008425a21041bf9c..a72c45809484ab58023dfa0dc5172f67adcfdc23 100644 (file)
@@ -225,12 +225,18 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector,
        u8 *dpcd = nv_encoder->dp.dpcd;
        int ret = NOUVEAU_DP_NONE, hpd;
 
-       /* If we've already read the DPCD on an eDP device, we don't need to
-        * reread it as it won't change
+       /* eDP ports don't support hotplugging - so there's no point in probing eDP ports unless we
+        * haven't probed them once before.
         */
-       if (connector->connector_type == DRM_MODE_CONNECTOR_eDP &&
-           dpcd[DP_DPCD_REV] != 0)
-               return NOUVEAU_DP_SST;
+       if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+               if (connector->status == connector_status_connected)
+                       return NOUVEAU_DP_SST;
+               else if (connector->status == connector_status_disconnected)
+                       return NOUVEAU_DP_NONE;
+       }
+
+       // Ensure that the aux bus is enabled for probing
+       drm_dp_dpcd_set_powered(&nv_connector->aux, true);
 
        mutex_lock(&nv_encoder->dp.hpd_irq_lock);
        if (mstm) {
@@ -293,6 +299,13 @@ out:
        if (mstm && !mstm->suspended && ret != NOUVEAU_DP_MST)
                nv50_mstm_remove(mstm);
 
+       /* GSP doesn't like when we try to do aux transactions on a port it considers disconnected,
+        * and since we don't really have a usecase for that anyway - just disable the aux bus here
+        * if we've decided the connector is disconnected
+        */
+       if (ret == NOUVEAU_DP_NONE)
+               drm_dp_dpcd_set_powered(&nv_connector->aux, false);
+
        mutex_unlock(&nv_encoder->dp.hpd_irq_lock);
        return ret;
 }
index 4bf486b57101367708bba2b6fe4bdd1d985f1d19..cb05f7f48a98bb53fc3e03b57166466c675acd7c 100644 (file)
@@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name)
        return ERR_PTR(-EINVAL);
 }
 
+static void of_fini(void *p)
+{
+       kfree(p);
+}
+
 const struct nvbios_source
 nvbios_of = {
        .name = "OpenFirmware",
        .init = of_init,
-       .fini = (void(*)(void *))kfree,
+       .fini = of_fini,
        .read = of_read,
        .size = of_size,
        .rw = false,
index 7bcbc4895ec22196acecfd46d0b29490d2c93ee2..271bfa038f5bc90974acd1ed2709d5cbae51ed94 100644 (file)
@@ -25,6 +25,7 @@
 
 #include <subdev/bios.h>
 #include <subdev/bios/init.h>
+#include <subdev/gsp.h>
 
 void
 gm107_devinit_disable(struct nvkm_devinit *init)
@@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init)
        u32 r021c00 = nvkm_rd32(device, 0x021c00);
        u32 r021c04 = nvkm_rd32(device, 0x021c04);
 
-       if (r021c00 & 0x00000001)
-               nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
-       if (r021c00 & 0x00000004)
-               nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+       /* gsp only wants to enable/disable display */
+       if (!nvkm_gsp_rm(device->gsp)) {
+               if (r021c00 & 0x00000001)
+                       nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0);
+               if (r021c00 & 0x00000004)
+                       nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2);
+       }
        if (r021c04 & 0x00000001)
                nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0);
 }
index 11b4c9c274a1a597cb3592019d873345c241d1cd..666eb93b1742ca5435cf0567e28e1664122bad8b 100644 (file)
@@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw,
 
        rm->dtor = r535_devinit_dtor;
        rm->post = hw->post;
+       rm->disable = hw->disable;
 
        ret = nv50_devinit_new_(rm, device, type, inst, pdevinit);
        if (ret)
index 9994cbd6f1c40c0c798498687f4f5d7168e883c5..9858c1438aa7feda7d84ff5442f611b23f101b2d 100644 (file)
@@ -1112,7 +1112,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
        rpc->numEntries = NV_GSP_REG_NUM_ENTRIES;
 
        str_offset = offsetof(typeof(*rpc), entries[NV_GSP_REG_NUM_ENTRIES]);
-       strings = (char *)&rpc->entries[NV_GSP_REG_NUM_ENTRIES];
+       strings = (char *)rpc + str_offset;
        for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) {
                int name_len = strlen(r535_registry_entries[i].name) + 1;
 
index a7f3fc342d87e03b031b5008d939c2eb46f49404..dd5b5a17ece0beed225888888d6c01a0afcf67c9 100644 (file)
@@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
        void __iomem *map = NULL;
 
        /* Already mapped? */
-       if (refcount_inc_not_zero(&iobj->maps))
+       if (refcount_inc_not_zero(&iobj->maps)) {
+               /* read barrier match the wmb on refcount set */
+               smp_rmb();
                return iobj->map;
+       }
 
        /* Take the lock, and re-check that another thread hasn't
         * already mapped the object in the meantime.
@@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
                        iobj->base.memory.ptrs = &nv50_instobj_fast;
                else
                        iobj->base.memory.ptrs = &nv50_instobj_slow;
+               /* barrier to ensure the ptrs are written before refcount is set */
+               smp_wmb();
                refcount_set(&iobj->maps, 1);
        }
 
index cb7406d7446695ebd3566230f3e11fca3b4cc323..c39fe0fc5d69c646915561bc3d4cb5cfc5411ac1 100644 (file)
@@ -614,8 +614,6 @@ static void nt36672e_panel_remove(struct mipi_dsi_device *dsi)
        struct nt36672e_panel *ctx = mipi_dsi_get_drvdata(dsi);
 
        mipi_dsi_detach(ctx->dsi);
-       mipi_dsi_device_unregister(ctx->dsi);
-
        drm_panel_remove(&ctx->panel);
 }
 
index 775144695283f54dcb1c527e58c9604cfd6da207..b15ca56a09a74a06f8bfcd0b4053d554ced9b58d 100644 (file)
@@ -253,8 +253,6 @@ static void visionox_rm69299_remove(struct mipi_dsi_device *dsi)
        struct visionox_rm69299 *ctx = mipi_dsi_get_drvdata(dsi);
 
        mipi_dsi_detach(ctx->dsi);
-       mipi_dsi_device_unregister(ctx->dsi);
-
        drm_panel_remove(&ctx->panel);
 }
 
index f38385fe76bbb45d92bf75cf078faec1f8be52ff..b91019cd5acb191a560b7217ff792cf4222004fa 100644 (file)
@@ -502,11 +502,18 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
        mapping_set_unevictable(mapping);
 
        for (i = page_offset; i < page_offset + NUM_FAULT_PAGES; i++) {
+               /* Can happen if the last fault only partially filled this
+                * section of the pages array before failing. In that case
+                * we skip already filled pages.
+                */
+               if (pages[i])
+                       continue;
+
                pages[i] = shmem_read_mapping_page(mapping, i);
                if (IS_ERR(pages[i])) {
                        ret = PTR_ERR(pages[i]);
                        pages[i] = NULL;
-                       goto err_pages;
+                       goto err_unlock;
                }
        }
 
@@ -514,7 +521,7 @@ static int panfrost_mmu_map_fault_addr(struct panfrost_device *pfdev, int as,
        ret = sg_alloc_table_from_pages(sgt, pages + page_offset,
                                        NUM_FAULT_PAGES, 0, SZ_2M, GFP_KERNEL);
        if (ret)
-               goto err_pages;
+               goto err_unlock;
 
        ret = dma_map_sgtable(pfdev->dev, sgt, DMA_BIDIRECTIONAL, 0);
        if (ret)
@@ -537,8 +544,6 @@ out:
 
 err_map:
        sg_free_table(sgt);
-err_pages:
-       drm_gem_shmem_put_pages(&bo->base);
 err_unlock:
        dma_resv_unlock(obj->resv);
 err_bo:
index 368d26da0d6a233467cdc8ef5820ebf4b7ddb964..9febc8b73f09efaaaac9d6fb8d2776f2148aed89 100644 (file)
@@ -58,16 +58,56 @@ static long qxl_fence_wait(struct dma_fence *fence, bool intr,
                           signed long timeout)
 {
        struct qxl_device *qdev;
+       struct qxl_release *release;
+       int count = 0, sc = 0;
+       bool have_drawable_releases;
        unsigned long cur, end = jiffies + timeout;
 
        qdev = container_of(fence->lock, struct qxl_device, release_lock);
+       release = container_of(fence, struct qxl_release, base);
+       have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
 
-       if (!wait_event_timeout(qdev->release_event,
-                               (dma_fence_is_signaled(fence) ||
-                                (qxl_io_notify_oom(qdev), 0)),
-                               timeout))
-               return 0;
+retry:
+       sc++;
+
+       if (dma_fence_is_signaled(fence))
+               goto signaled;
+
+       qxl_io_notify_oom(qdev);
+
+       for (count = 0; count < 11; count++) {
+               if (!qxl_queue_garbage_collect(qdev, true))
+                       break;
+
+               if (dma_fence_is_signaled(fence))
+                       goto signaled;
+       }
+
+       if (dma_fence_is_signaled(fence))
+               goto signaled;
+
+       if (have_drawable_releases || sc < 4) {
+               if (sc > 2)
+                       /* back off */
+                       usleep_range(500, 1000);
+
+               if (time_after(jiffies, end))
+                       return 0;
+
+               if (have_drawable_releases && sc > 300) {
+                       DMA_FENCE_WARN(fence,
+                                      "failed to wait on release %llu after spincount %d\n",
+                                      fence->context & ~0xf0000000, sc);
+                       goto signaled;
+               }
+               goto retry;
+       }
+       /*
+        * yeah, original sync_obj_wait gave up after 3 spins when
+        * have_drawable_releases is not set.
+        */
 
+signaled:
        cur = jiffies;
        if (time_after(cur, end))
                return 0;
index 94947229888ba7888aa6992116af8ab985219dbe..b7f22597ee95e798bb104894052997e332c298c6 100644 (file)
@@ -424,7 +424,7 @@ typedef struct _ATOM_PPLIB_SUMO_CLOCK_INFO{
 typedef struct _ATOM_PPLIB_STATE_V2
 {
       //number of valid dpm levels in this state; Driver uses it to calculate the whole 
-      //size of the state: sizeof(ATOM_PPLIB_STATE_V2) + (ucNumDPMLevels - 1) * sizeof(UCHAR)
+      //size of the state: struct_size(ATOM_PPLIB_STATE_V2, clockInfoIndex, ucNumDPMLevels)
       UCHAR ucNumDPMLevels;
       
       //a index to the array of nonClockInfos
@@ -432,14 +432,14 @@ typedef struct _ATOM_PPLIB_STATE_V2
       /**
       * Driver will read the first ucNumDPMLevels in this array
       */
-      UCHAR clockInfoIndex[1];
+      UCHAR clockInfoIndex[] __counted_by(ucNumDPMLevels);
 } ATOM_PPLIB_STATE_V2;
 
 typedef struct _StateArray{
     //how many states we have 
     UCHAR ucNumEntries;
     
-    ATOM_PPLIB_STATE_V2 states[1];
+    ATOM_PPLIB_STATE_V2 states[] __counted_by(ucNumEntries);
 }StateArray;
 
 
@@ -450,7 +450,7 @@ typedef struct _ClockInfoArray{
     //sizeof(ATOM_PPLIB_CLOCK_INFO)
     UCHAR ucEntrySize;
     
-    UCHAR clockInfo[1];
+    UCHAR clockInfo[] __counted_by(ucNumEntries);
 }ClockInfoArray;
 
 typedef struct _NonClockInfoArray{
@@ -460,7 +460,7 @@ typedef struct _NonClockInfoArray{
     //sizeof(ATOM_PPLIB_NONCLOCK_INFO)
     UCHAR ucEntrySize;
     
-    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[1];
+    ATOM_PPLIB_NONCLOCK_INFO nonClockInfo[] __counted_by(ucNumEntries);
 }NonClockInfoArray;
 
 typedef struct _ATOM_PPLIB_Clock_Voltage_Dependency_Record
index bb1f0a3371ab5de484a81ad040347c9a5a8d4e76..10793a433bf58697fcdfce8e850ebfdd55ec7284 100644 (file)
@@ -923,8 +923,12 @@ bool radeon_get_atom_connector_info_from_supported_devices_table(struct
                max_device = ATOM_MAX_SUPPORTED_DEVICE_INFO;
 
        for (i = 0; i < max_device; i++) {
-               ATOM_CONNECTOR_INFO_I2C ci =
-                   supported_devices->info.asConnInfo[i];
+               ATOM_CONNECTOR_INFO_I2C ci;
+
+               if (frev > 1)
+                       ci = supported_devices->info_2d1.asConnInfo[i];
+               else
+                       ci = supported_devices->info.asConnInfo[i];
 
                bios_connectors[i].valid = false;
 
index 112438d965ffbefd4fa2cce5f246cc03a63759f9..6e1fd6985ffcb730eb7057c4509aec971dfa8266 100644 (file)
@@ -288,17 +288,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
                                                  enum ttm_caching caching,
                                                  unsigned int order)
 {
-       if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
+       if (pool->use_dma_alloc)
                return &pool->caching[caching].orders[order];
 
 #ifdef CONFIG_X86
        switch (caching) {
        case ttm_write_combined:
+               if (pool->nid != NUMA_NO_NODE)
+                       return &pool->caching[caching].orders[order];
+
                if (pool->use_dma32)
                        return &global_dma32_write_combined[order];
 
                return &global_write_combined[order];
        case ttm_uncached:
+               if (pool->nid != NUMA_NO_NODE)
+                       return &pool->caching[caching].orders[order];
+
                if (pool->use_dma32)
                        return &global_dma32_uncached[order];
 
@@ -566,11 +572,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
        pool->use_dma_alloc = use_dma_alloc;
        pool->use_dma32 = use_dma32;
 
-       if (use_dma_alloc || nid != NUMA_NO_NODE) {
-               for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-                       for (j = 0; j < NR_PAGE_ORDERS; ++j)
-                               ttm_pool_type_init(&pool->caching[i].orders[j],
-                                                  pool, i, j);
+       for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+               for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+                       struct ttm_pool_type *pt;
+
+                       /* Initialize only pool types which are actually used */
+                       pt = ttm_pool_select_type(pool, i, j);
+                       if (pt != &pool->caching[i].orders[j])
+                               continue;
+
+                       ttm_pool_type_init(pt, pool, i, j);
+               }
        }
 }
 EXPORT_SYMBOL(ttm_pool_init);
@@ -599,10 +611,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
 {
        unsigned int i, j;
 
-       if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
-               for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
-                       for (j = 0; j < NR_PAGE_ORDERS; ++j)
-                               ttm_pool_type_fini(&pool->caching[i].orders[j]);
+       for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
+               for (j = 0; j < NR_PAGE_ORDERS; ++j) {
+                       struct ttm_pool_type *pt;
+
+                       pt = ttm_pool_select_type(pool, i, j);
+                       if (pt != &pool->caching[i].orders[j])
+                               continue;
+
+                       ttm_pool_type_fini(pt);
+               }
        }
 
        /* We removed the pool types from the LRU, but we need to also make sure
index 2e04f6cb661e4f42eeaaef3c5d7fcdaee501d457..ce6b2fb341d1f8a85bab6f0ed19fd3ccde39757b 100644 (file)
@@ -105,7 +105,6 @@ v3d_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->bin_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_BIN];
 
-               file->enabled_ns[V3D_BIN] += local_clock() - file->start_ns[V3D_BIN];
                file->jobs_sent[V3D_BIN]++;
                v3d->queue[V3D_BIN].jobs_sent++;
 
@@ -126,7 +125,6 @@ v3d_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->render_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_RENDER];
 
-               file->enabled_ns[V3D_RENDER] += local_clock() - file->start_ns[V3D_RENDER];
                file->jobs_sent[V3D_RENDER]++;
                v3d->queue[V3D_RENDER].jobs_sent++;
 
@@ -147,7 +145,6 @@ v3d_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->csd_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_CSD];
 
-               file->enabled_ns[V3D_CSD] += local_clock() - file->start_ns[V3D_CSD];
                file->jobs_sent[V3D_CSD]++;
                v3d->queue[V3D_CSD].jobs_sent++;
 
@@ -195,7 +192,6 @@ v3d_hub_irq(int irq, void *arg)
                struct v3d_file_priv *file = v3d->tfu_job->base.file->driver_priv;
                u64 runtime = local_clock() - file->start_ns[V3D_TFU];
 
-               file->enabled_ns[V3D_TFU] += local_clock() - file->start_ns[V3D_TFU];
                file->jobs_sent[V3D_TFU]++;
                v3d->queue[V3D_TFU].jobs_sent++;
 
index c52c7bf1485b1fa95b1e9ca3f1e05135167a8c5c..717d624e9a052298d5d5070551e909cc65ee0cc5 100644 (file)
@@ -456,8 +456,10 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
                .no_wait_gpu = false
        };
        u32 j, initial_line = dst_offset / dst_stride;
-       struct vmw_bo_blit_line_data d;
+       struct vmw_bo_blit_line_data d = {0};
        int ret = 0;
+       struct page **dst_pages = NULL;
+       struct page **src_pages = NULL;
 
        /* Buffer objects need to be either pinned or reserved: */
        if (!(dst->pin_count))
@@ -477,12 +479,35 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst,
                        return ret;
        }
 
+       if (!src->ttm->pages && src->ttm->sg) {
+               src_pages = kvmalloc_array(src->ttm->num_pages,
+                                          sizeof(struct page *), GFP_KERNEL);
+               if (!src_pages)
+                       return -ENOMEM;
+               ret = drm_prime_sg_to_page_array(src->ttm->sg, src_pages,
+                                                src->ttm->num_pages);
+               if (ret)
+                       goto out;
+       }
+       if (!dst->ttm->pages && dst->ttm->sg) {
+               dst_pages = kvmalloc_array(dst->ttm->num_pages,
+                                          sizeof(struct page *), GFP_KERNEL);
+               if (!dst_pages) {
+                       ret = -ENOMEM;
+                       goto out;
+               }
+               ret = drm_prime_sg_to_page_array(dst->ttm->sg, dst_pages,
+                                                dst->ttm->num_pages);
+               if (ret)
+                       goto out;
+       }
+
        d.mapped_dst = 0;
        d.mapped_src = 0;
        d.dst_addr = NULL;
        d.src_addr = NULL;
-       d.dst_pages = dst->ttm->pages;
-       d.src_pages = src->ttm->pages;
+       d.dst_pages = dst->ttm->pages ? dst->ttm->pages : dst_pages;
+       d.src_pages = src->ttm->pages ? src->ttm->pages : src_pages;
        d.dst_num_pages = PFN_UP(dst->resource->size);
        d.src_num_pages = PFN_UP(src->resource->size);
        d.dst_prot = ttm_io_prot(dst, dst->resource, PAGE_KERNEL);
@@ -504,6 +529,10 @@ out:
                kunmap_atomic(d.src_addr);
        if (d.dst_addr)
                kunmap_atomic(d.dst_addr);
+       if (src_pages)
+               kvfree(src_pages);
+       if (dst_pages)
+               kvfree(dst_pages);
 
        return ret;
 }
index bfd41ce3c8f4fca1f5a659e4513e08f72ea95966..e5eb21a471a6010aa956c811522956f27b99a096 100644 (file)
@@ -377,7 +377,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
 {
        struct ttm_operation_ctx ctx = {
                .interruptible = params->bo_type != ttm_bo_type_kernel,
-               .no_wait_gpu = false
+               .no_wait_gpu = false,
+               .resv = params->resv,
        };
        struct ttm_device *bdev = &dev_priv->bdev;
        struct drm_device *vdev = &dev_priv->drm;
@@ -394,8 +395,8 @@ static int vmw_bo_init(struct vmw_private *dev_priv,
 
        vmw_bo_placement_set(vmw_bo, params->domain, params->busy_domain);
        ret = ttm_bo_init_reserved(bdev, &vmw_bo->tbo, params->bo_type,
-                                  &vmw_bo->placement, 0, &ctx, NULL,
-                                  NULL, destroy);
+                                  &vmw_bo->placement, 0, &ctx,
+                                  params->sg, params->resv, destroy);
        if (unlikely(ret))
                return ret;
 
index 0d496dc9c6af7a352c0432f50f4dd9be37448b5e..f349642e6190d6933031d08ccd7f353231f0f1da 100644 (file)
@@ -55,6 +55,8 @@ struct vmw_bo_params {
        enum ttm_bo_type bo_type;
        size_t size;
        bool pin;
+       struct dma_resv *resv;
+       struct sg_table *sg;
 };
 
 /**
index c7d90f96d16a67beddf9395cf1ad611cb6f1cf34..58fb40c93100a84ec8b1dd769f35ab31c00bd0dc 100644 (file)
@@ -666,11 +666,12 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
                [vmw_dma_map_populate] = "Caching DMA mappings.",
                [vmw_dma_map_bind] = "Giving up DMA mappings early."};
 
-       /* TTM currently doesn't fully support SEV encryption. */
-       if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
-               return -EINVAL;
-
-       if (vmw_force_coherent)
+       /*
+        * When running with SEV we always want dma mappings, because
+        * otherwise ttm tt pool pages will bounce through swiotlb running
+        * out of available space.
+        */
+       if (vmw_force_coherent || cc_platform_has(CC_ATTR_MEM_ENCRYPT))
                dev_priv->map_mode = vmw_dma_alloc_coherent;
        else if (vmw_restrict_iommu)
                dev_priv->map_mode = vmw_dma_map_bind;
@@ -1627,6 +1628,7 @@ static const struct drm_driver driver = {
 
        .prime_fd_to_handle = vmw_prime_fd_to_handle,
        .prime_handle_to_fd = vmw_prime_handle_to_fd,
+       .gem_prime_import_sg_table = vmw_prime_import_sg_table,
 
        .fops = &vmwgfx_driver_fops,
        .name = VMWGFX_DRIVER_NAME,
index 12efecc17df664968056906da40cdd46b5665ddf..b019a1a1787af59e3fca37f560db905d7b2b6ab0 100644 (file)
@@ -1130,6 +1130,9 @@ extern int vmw_prime_handle_to_fd(struct drm_device *dev,
                                  struct drm_file *file_priv,
                                  uint32_t handle, uint32_t flags,
                                  int *prime_fd);
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+                                                struct dma_buf_attachment *attach,
+                                                struct sg_table *table);
 
 /*
  * MemoryOBject management -  vmwgfx_mob.c
index 12787bb9c111d10db997b9db650c6bb1069c26ef..d6bcaf078b1f40bbf75bdfb63fd1e00b7901e20f 100644 (file)
@@ -149,6 +149,38 @@ out_no_bo:
        return ret;
 }
 
+struct drm_gem_object *vmw_prime_import_sg_table(struct drm_device *dev,
+                                                struct dma_buf_attachment *attach,
+                                                struct sg_table *table)
+{
+       int ret;
+       struct vmw_private *dev_priv = vmw_priv(dev);
+       struct drm_gem_object *gem = NULL;
+       struct vmw_bo *vbo;
+       struct vmw_bo_params params = {
+               .domain = (dev_priv->has_mob) ? VMW_BO_DOMAIN_SYS : VMW_BO_DOMAIN_VRAM,
+               .busy_domain = VMW_BO_DOMAIN_SYS,
+               .bo_type = ttm_bo_type_sg,
+               .size = attach->dmabuf->size,
+               .pin = false,
+               .resv = attach->dmabuf->resv,
+               .sg = table,
+
+       };
+
+       dma_resv_lock(params.resv, NULL);
+
+       ret = vmw_bo_create(dev_priv, &params, &vbo);
+       if (ret != 0)
+               goto out_no_bo;
+
+       vbo->tbo.base.funcs = &vmw_gem_object_funcs;
+
+       gem = &vbo->tbo.base;
+out_no_bo:
+       dma_resv_unlock(params.resv);
+       return gem;
+}
 
 int vmw_gem_object_create_ioctl(struct drm_device *dev, void *data,
                                struct drm_file *filp)
index cd4925346ed45a1c10ae4e9d9b13c0066c71f168..84ae4e10a2ebec20c52a7eb42ea5455a0d22dfa5 100644 (file)
@@ -933,6 +933,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane,
 int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
                             struct drm_atomic_state *state)
 {
+       struct vmw_private *vmw = vmw_priv(crtc->dev);
        struct drm_crtc_state *new_state = drm_atomic_get_new_crtc_state(state,
                                                                         crtc);
        struct vmw_display_unit *du = vmw_crtc_to_du(new_state->crtc);
@@ -940,9 +941,13 @@ int vmw_du_crtc_atomic_check(struct drm_crtc *crtc,
        bool has_primary = new_state->plane_mask &
                           drm_plane_mask(crtc->primary);
 
-       /* We always want to have an active plane with an active CRTC */
-       if (has_primary != new_state->enable)
-               return -EINVAL;
+       /*
+        * This is fine in general, but broken userspace might expect
+        * some actual rendering so give a clue as why it's blank.
+        */
+       if (new_state->enable && !has_primary)
+               drm_dbg_driver(&vmw->drm,
+                              "CRTC without a primary plane will be blank.\n");
 
 
        if (new_state->connector_mask != connector_mask &&
index a94947b588e85f2c764aab60e11a84e59dd2a2ea..19a843da87b789b62279ecb9dccb8b2ddb19fe2f 100644 (file)
@@ -243,10 +243,10 @@ struct vmw_framebuffer_bo {
 
 
 static const uint32_t __maybe_unused vmw_primary_plane_formats[] = {
-       DRM_FORMAT_XRGB1555,
-       DRM_FORMAT_RGB565,
        DRM_FORMAT_XRGB8888,
        DRM_FORMAT_ARGB8888,
+       DRM_FORMAT_RGB565,
+       DRM_FORMAT_XRGB1555,
 };
 
 static const uint32_t __maybe_unused vmw_cursor_plane_formats[] = {
index 2d72a5ee7c0c710339d5d25c0a9376745a90f7af..c99cad444991579f6e665453b74f56cb35de2e15 100644 (file)
@@ -75,8 +75,12 @@ int vmw_prime_fd_to_handle(struct drm_device *dev,
                           int fd, u32 *handle)
 {
        struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+       int ret = ttm_prime_fd_to_handle(tfile, fd, handle);
 
-       return ttm_prime_fd_to_handle(tfile, fd, handle);
+       if (ret)
+               ret = drm_gem_prime_fd_to_handle(dev, file_priv, fd, handle);
+
+       return ret;
 }
 
 int vmw_prime_handle_to_fd(struct drm_device *dev,
@@ -85,5 +89,12 @@ int vmw_prime_handle_to_fd(struct drm_device *dev,
                           int *prime_fd)
 {
        struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
-       return ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+       int ret;
+
+       if (handle > VMWGFX_NUM_MOB)
+               ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd);
+       else
+               ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd);
+
+       return ret;
 }
index 4d23d0a70bcb7ef4901e9128b84cd7112ae8913a..621d98b376bbbc4b40cef6b9c6759b975610dd56 100644 (file)
@@ -188,13 +188,18 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
        switch (dev_priv->map_mode) {
        case vmw_dma_map_bind:
        case vmw_dma_map_populate:
-               vsgt->sgt = &vmw_tt->sgt;
-               ret = sg_alloc_table_from_pages_segment(
-                       &vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
-                       (unsigned long)vsgt->num_pages << PAGE_SHIFT,
-                       dma_get_max_seg_size(dev_priv->drm.dev), GFP_KERNEL);
-               if (ret)
-                       goto out_sg_alloc_fail;
+               if (vmw_tt->dma_ttm.page_flags  & TTM_TT_FLAG_EXTERNAL) {
+                       vsgt->sgt = vmw_tt->dma_ttm.sg;
+               } else {
+                       vsgt->sgt = &vmw_tt->sgt;
+                       ret = sg_alloc_table_from_pages_segment(&vmw_tt->sgt,
+                               vsgt->pages, vsgt->num_pages, 0,
+                               (unsigned long)vsgt->num_pages << PAGE_SHIFT,
+                               dma_get_max_seg_size(dev_priv->drm.dev),
+                               GFP_KERNEL);
+                       if (ret)
+                               goto out_sg_alloc_fail;
+               }
 
                ret = vmw_ttm_map_for_dma(vmw_tt);
                if (unlikely(ret != 0))
@@ -209,8 +214,9 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
        return 0;
 
 out_map_fail:
-       sg_free_table(vmw_tt->vsgt.sgt);
-       vmw_tt->vsgt.sgt = NULL;
+       drm_warn(&dev_priv->drm, "VSG table map failed!");
+       sg_free_table(vsgt->sgt);
+       vsgt->sgt = NULL;
 out_sg_alloc_fail:
        return ret;
 }
@@ -356,15 +362,17 @@ static void vmw_ttm_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
 static int vmw_ttm_populate(struct ttm_device *bdev,
                            struct ttm_tt *ttm, struct ttm_operation_ctx *ctx)
 {
-       int ret;
+       bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
 
-       /* TODO: maybe completely drop this ? */
        if (ttm_tt_is_populated(ttm))
                return 0;
 
-       ret = ttm_pool_alloc(&bdev->pool, ttm, ctx);
+       if (external && ttm->sg)
+               return  drm_prime_sg_to_dma_addr_array(ttm->sg,
+                                                      ttm->dma_address,
+                                                      ttm->num_pages);
 
-       return ret;
+       return ttm_pool_alloc(&bdev->pool, ttm, ctx);
 }
 
 static void vmw_ttm_unpopulate(struct ttm_device *bdev,
@@ -372,6 +380,10 @@ static void vmw_ttm_unpopulate(struct ttm_device *bdev,
 {
        struct vmw_ttm_tt *vmw_tt = container_of(ttm, struct vmw_ttm_tt,
                                                 dma_ttm);
+       bool external = (ttm->page_flags & TTM_TT_FLAG_EXTERNAL) != 0;
+
+       if (external)
+               return;
 
        vmw_ttm_unbind(bdev, ttm);
 
@@ -390,6 +402,7 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
 {
        struct vmw_ttm_tt *vmw_be;
        int ret;
+       bool external = bo->type == ttm_bo_type_sg;
 
        vmw_be = kzalloc(sizeof(*vmw_be), GFP_KERNEL);
        if (!vmw_be)
@@ -398,7 +411,10 @@ static struct ttm_tt *vmw_ttm_tt_create(struct ttm_buffer_object *bo,
        vmw_be->dev_priv = vmw_priv_from_ttm(bo->bdev);
        vmw_be->mob = NULL;
 
-       if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent)
+       if (external)
+               page_flags |= TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+
+       if (vmw_be->dev_priv->map_mode == vmw_dma_alloc_coherent || external)
                ret = ttm_sg_tt_init(&vmw_be->dma_ttm, bo, page_flags,
                                     ttm_cached);
        else
index b21da7b745a5e7cd6b3e34e4fb8d42a45b2b6466..a9c1f9885c6bb4d2727cbce81d5be93cb9458a38 100644 (file)
@@ -31,7 +31,7 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
 
        ret = ttm_bo_reserve(&bo->ttm, true, false, NULL);
        if (ret)
-               return ret;
+               goto err;
 
        if (!(bo->flags & XE_BO_SCANOUT_BIT)) {
                /*
@@ -42,12 +42,16 @@ int intel_fb_bo_framebuffer_init(struct intel_framebuffer *intel_fb,
                 */
                if (XE_IOCTL_DBG(i915, !list_empty(&bo->ttm.base.gpuva.list))) {
                        ttm_bo_unreserve(&bo->ttm);
-                       return -EINVAL;
+                       ret = -EINVAL;
+                       goto err;
                }
                bo->flags |= XE_BO_SCANOUT_BIT;
        }
        ttm_bo_unreserve(&bo->ttm);
+       return 0;
 
+err:
+       xe_bo_put(bo);
        return ret;
 }
 
index e4db069f0db3f1fd27ed80eb84fc4544ea0831df..6ec375c1c4b6c05aed07ba8432214b3de270c56e 100644 (file)
@@ -108,11 +108,6 @@ int xe_display_create(struct xe_device *xe)
        xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
 
        drmm_mutex_init(&xe->drm, &xe->sb_lock);
-       drmm_mutex_init(&xe->drm, &xe->display.backlight.lock);
-       drmm_mutex_init(&xe->drm, &xe->display.audio.mutex);
-       drmm_mutex_init(&xe->drm, &xe->display.wm.wm_mutex);
-       drmm_mutex_init(&xe->drm, &xe->display.pps.mutex);
-       drmm_mutex_init(&xe->drm, &xe->display.hdcp.hdcp_mutex);
        xe->enabled_irq_mask = ~0;
 
        err = drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
index 0b1266c88a6af39cba103e3447697c0540c0cc0d..deddc8be48c0af2133969c7452d12cd2e104f291 100644 (file)
 #define RING_EXECLIST_STATUS_LO(base)          XE_REG((base) + 0x234)
 #define RING_EXECLIST_STATUS_HI(base)          XE_REG((base) + 0x234 + 4)
 
-#define RING_CONTEXT_CONTROL(base)             XE_REG((base) + 0x244)
+#define RING_CONTEXT_CONTROL(base)             XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
 #define          CTX_CTRL_INHIBIT_SYN_CTX_SWITCH       REG_BIT(3)
 #define          CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT   REG_BIT(0)
 
index b82233a4160624d2d3dad941327bf7ecff5a3382..9ac7fbe201b3c22fa25959f98af87d453a962a17 100644 (file)
@@ -290,7 +290,7 @@ xe_hwmon_power1_max_interval_show(struct device *dev, struct device_attribute *a
         * As y can be < 2, we compute tau4 = (4 | x) << y
         * and then add 2 when doing the final right shift to account for units
         */
-       tau4 = ((1 << x_w) | x) << y;
+       tau4 = (u64)((1 << x_w) | x) << y;
 
        /* val in hwmon interface units (millisec) */
        out = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
@@ -330,7 +330,7 @@ xe_hwmon_power1_max_interval_store(struct device *dev, struct device_attribute *
        r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
        x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
        y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
-       tau4 = ((1 << x_w) | x) << y;
+       tau4 = (u64)((1 << x_w) | x) << y;
        max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
 
        if (val > max_win)
index 1426febe86eb676305772d7ee444b70af8254848..57066faf575eec7edebf335da434b6c1615d935f 100644 (file)
@@ -525,9 +525,8 @@ static const u8 *reg_offsets(struct xe_device *xe, enum xe_engine_class class)
 
 static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
 {
-       regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH) |
-                                   _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT) |
-                                   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT;
+       regs[CTX_CONTEXT_CONTROL] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
+                                                      CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
        /* TODO: Timestamp */
 }
index ee1bb938c493487415445cd41c8b771080464522..2ba4fb9511f63fa894796dec90c89963a3dae1b0 100644 (file)
@@ -227,7 +227,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
                if (vm->flags & XE_VM_FLAG_64K && level == 1)
                        flags = XE_PDE_64K;
 
-               entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (level - 1) *
+               entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
                                                  XE_PAGE_SIZE, pat_index);
                xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
                          entry | flags);
@@ -235,7 +235,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 
        /* Write PDE's that point to our BO. */
        for (i = 0; i < num_entries - num_level; i++) {
-               entry = vm->pt_ops->pde_encode_bo(bo, i * XE_PAGE_SIZE,
+               entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
                                                  pat_index);
 
                xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
@@ -291,7 +291,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
 #define VM_SA_UPDATE_UNIT_SIZE         (XE_PAGE_SIZE / NUM_VMUSA_UNIT_PER_PAGE)
 #define NUM_VMUSA_WRITES_PER_UNIT      (VM_SA_UPDATE_UNIT_SIZE / sizeof(u64))
        drm_suballoc_manager_init(&m->vm_update_sa,
-                                 (map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
+                                 (size_t)(map_ofs / XE_PAGE_SIZE - NUM_KERNEL_PDE) *
                                  NUM_VMUSA_UNIT_PER_PAGE, 0);
 
        m->pt_bo = bo;
@@ -490,7 +490,7 @@ static void emit_pte(struct xe_migrate *m,
        struct xe_vm *vm = m->q->vm;
        u16 pat_index;
        u32 ptes;
-       u64 ofs = at_pt * XE_PAGE_SIZE;
+       u64 ofs = (u64)at_pt * XE_PAGE_SIZE;
        u64 cur_ofs;
 
        /* Indirect access needs compression enabled uncached PAT index */
index 62d1ef8867a84351ae7444d63113d8867dfbb0c5..3d4c8f342e215ed39263ba5c4c01079072dfcbbd 100644 (file)
@@ -1577,6 +1577,16 @@ void xe_vm_close_and_put(struct xe_vm *vm)
                xe->usm.num_vm_in_fault_mode--;
        else if (!(vm->flags & XE_VM_FLAG_MIGRATION))
                xe->usm.num_vm_in_non_fault_mode--;
+
+       if (vm->usm.asid) {
+               void *lookup;
+
+               xe_assert(xe, xe->info.has_asid);
+               xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
+
+               lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
+               xe_assert(xe, lookup == vm);
+       }
        mutex_unlock(&xe->usm.lock);
 
        for_each_tile(tile, xe, id)
@@ -1592,24 +1602,15 @@ static void vm_destroy_work_func(struct work_struct *w)
        struct xe_device *xe = vm->xe;
        struct xe_tile *tile;
        u8 id;
-       void *lookup;
 
        /* xe_vm_close_and_put was not called? */
        xe_assert(xe, !vm->size);
 
        mutex_destroy(&vm->snap_mutex);
 
-       if (!(vm->flags & XE_VM_FLAG_MIGRATION)) {
+       if (!(vm->flags & XE_VM_FLAG_MIGRATION))
                xe_device_mem_access_put(xe);
 
-               if (xe->info.has_asid && vm->usm.asid) {
-                       mutex_lock(&xe->usm.lock);
-                       lookup = xa_erase(&xe->usm.asid_to_vm, vm->usm.asid);
-                       xe_assert(xe, lookup == vm);
-                       mutex_unlock(&xe->usm.lock);
-               }
-       }
-
        for_each_tile(tile, xe, id)
                XE_WARN_ON(vm->pt_root[id]);
 
index 783975d1384fc4d8e780cb2cdf450b5bab8b55de..7c52757a89db9abde6fb211178b9cedb4b1c7740 100644 (file)
@@ -351,11 +351,6 @@ static int host1x_device_uevent(const struct device *dev,
        return 0;
 }
 
-static int host1x_dma_configure(struct device *dev)
-{
-       return of_dma_configure(dev, dev->of_node, true);
-}
-
 static const struct dev_pm_ops host1x_device_pm_ops = {
        .suspend = pm_generic_suspend,
        .resume = pm_generic_resume,
@@ -369,7 +364,6 @@ const struct bus_type host1x_bus_type = {
        .name = "host1x",
        .match = host1x_device_match,
        .uevent = host1x_device_uevent,
-       .dma_configure = host1x_dma_configure,
        .pm = &host1x_device_pm_ops,
 };
 
@@ -458,8 +452,6 @@ static int host1x_device_add(struct host1x *host1x,
        device->dev.bus = &host1x_bus_type;
        device->dev.parent = host1x->dev;
 
-       of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
        device->dev.dma_parms = &device->dma_parms;
        dma_set_max_seg_size(&device->dev, UINT_MAX);
 
index adbf674355b2b8a472c03bd60092960cb0c742cf..fb8cd8469328ee094619c91eb227a04e24bf66cb 100644 (file)
@@ -153,7 +153,9 @@ void vmbus_free_ring(struct vmbus_channel *channel)
        hv_ringbuffer_cleanup(&channel->inbound);
 
        if (channel->ringbuffer_page) {
-               __free_pages(channel->ringbuffer_page,
+               /* In a CoCo VM leak the memory if it didn't get re-encrypted */
+               if (!channel->ringbuffer_gpadlhandle.decrypted)
+                       __free_pages(channel->ringbuffer_page,
                             get_order(channel->ringbuffer_pagecount
                                       << PAGE_SHIFT));
                channel->ringbuffer_page = NULL;
@@ -436,9 +438,18 @@ static int __vmbus_establish_gpadl(struct vmbus_channel *channel,
                (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1);
 
        ret = create_gpadl_header(type, kbuffer, size, send_offset, &msginfo);
-       if (ret)
+       if (ret) {
+               gpadl->decrypted = false;
                return ret;
+       }
 
+       /*
+        * Set the "decrypted" flag to true for the set_memory_decrypted()
+        * success case. In the failure case, the encryption state of the
+        * memory is unknown. Leave "decrypted" as true to ensure the
+        * memory will be leaked instead of going back on the free list.
+        */
+       gpadl->decrypted = true;
        ret = set_memory_decrypted((unsigned long)kbuffer,
                                   PFN_UP(size));
        if (ret) {
@@ -527,9 +538,15 @@ cleanup:
 
        kfree(msginfo);
 
-       if (ret)
-               set_memory_encrypted((unsigned long)kbuffer,
-                                    PFN_UP(size));
+       if (ret) {
+               /*
+                * If set_memory_encrypted() fails, the decrypted flag is
+                * left as true so the memory is leaked instead of being
+                * put back on the free list.
+                */
+               if (!set_memory_encrypted((unsigned long)kbuffer, PFN_UP(size)))
+                       gpadl->decrypted = false;
+       }
 
        return ret;
 }
@@ -850,6 +867,8 @@ post_msg_err:
        if (ret)
                pr_warn("Fail to set mem host visibility in GPADL teardown %d.\n", ret);
 
+       gpadl->decrypted = ret;
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(vmbus_teardown_gpadl);
index 3cabeeabb1cacf0627b02110d6f4fc17abc7e4a0..f001ae880e1dbefc6243e6d902e529db43291987 100644 (file)
@@ -237,8 +237,17 @@ int vmbus_connect(void)
                                vmbus_connection.monitor_pages[0], 1);
        ret |= set_memory_decrypted((unsigned long)
                                vmbus_connection.monitor_pages[1], 1);
-       if (ret)
+       if (ret) {
+               /*
+                * If set_memory_decrypted() fails, the encryption state
+                * of the memory is unknown. So leak the memory instead
+                * of risking returning decrypted memory to the free list.
+                * For simplicity, always handle both pages the same.
+                */
+               vmbus_connection.monitor_pages[0] = NULL;
+               vmbus_connection.monitor_pages[1] = NULL;
                goto cleanup;
+       }
 
        /*
         * Set_memory_decrypted() will change the memory contents if
@@ -337,13 +346,19 @@ void vmbus_disconnect(void)
                vmbus_connection.int_page = NULL;
        }
 
-       set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
-       set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
+       if (vmbus_connection.monitor_pages[0]) {
+               if (!set_memory_encrypted(
+                       (unsigned long)vmbus_connection.monitor_pages[0], 1))
+                       hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+               vmbus_connection.monitor_pages[0] = NULL;
+       }
 
-       hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
-       hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
-       vmbus_connection.monitor_pages[0] = NULL;
-       vmbus_connection.monitor_pages[1] = NULL;
+       if (vmbus_connection.monitor_pages[1]) {
+               if (!set_memory_encrypted(
+                       (unsigned long)vmbus_connection.monitor_pages[1], 1))
+                       hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+               vmbus_connection.monitor_pages[1] = NULL;
+       }
 }
 
 /*
index 4cb17603a8289b259e64dc6a5be215cb1e1a8a57..12a707ab73f85cf363e6503346741a85bc9b82df 100644 (file)
@@ -131,7 +131,7 @@ static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
+       return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
 }
 static DEVICE_ATTR_RO(id);
 
@@ -142,7 +142,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n", hv_dev->channel->state);
+       return sysfs_emit(buf, "%d\n", hv_dev->channel->state);
 }
 static DEVICE_ATTR_RO(state);
 
@@ -153,7 +153,7 @@ static ssize_t monitor_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
+       return sysfs_emit(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
 }
 static DEVICE_ATTR_RO(monitor_id);
 
@@ -164,8 +164,8 @@ static ssize_t class_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "{%pUl}\n",
-                      &hv_dev->channel->offermsg.offer.if_type);
+       return sysfs_emit(buf, "{%pUl}\n",
+                         &hv_dev->channel->offermsg.offer.if_type);
 }
 static DEVICE_ATTR_RO(class_id);
 
@@ -176,8 +176,8 @@ static ssize_t device_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "{%pUl}\n",
-                      &hv_dev->channel->offermsg.offer.if_instance);
+       return sysfs_emit(buf, "{%pUl}\n",
+                         &hv_dev->channel->offermsg.offer.if_instance);
 }
 static DEVICE_ATTR_RO(device_id);
 
@@ -186,7 +186,7 @@ static ssize_t modalias_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
 
-       return sprintf(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
+       return sysfs_emit(buf, "vmbus:%*phN\n", UUID_SIZE, &hv_dev->dev_type);
 }
 static DEVICE_ATTR_RO(modalias);
 
@@ -199,7 +199,7 @@ static ssize_t numa_node_show(struct device *dev,
        if (!hv_dev->channel)
                return -ENODEV;
 
-       return sprintf(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
+       return sysfs_emit(buf, "%d\n", cpu_to_node(hv_dev->channel->target_cpu));
 }
 static DEVICE_ATTR_RO(numa_node);
 #endif
@@ -212,9 +212,8 @@ static ssize_t server_monitor_pending_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_pending(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[0]));
+       return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+                         vmbus_connection.monitor_pages[0]));
 }
 static DEVICE_ATTR_RO(server_monitor_pending);
 
@@ -226,9 +225,8 @@ static ssize_t client_monitor_pending_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_pending(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[1]));
+       return sysfs_emit(buf, "%d\n", channel_pending(hv_dev->channel,
+                         vmbus_connection.monitor_pages[1]));
 }
 static DEVICE_ATTR_RO(client_monitor_pending);
 
@@ -240,9 +238,8 @@ static ssize_t server_monitor_latency_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_latency(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[0]));
+       return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+                         vmbus_connection.monitor_pages[0]));
 }
 static DEVICE_ATTR_RO(server_monitor_latency);
 
@@ -254,9 +251,8 @@ static ssize_t client_monitor_latency_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_latency(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[1]));
+       return sysfs_emit(buf, "%d\n", channel_latency(hv_dev->channel,
+                         vmbus_connection.monitor_pages[1]));
 }
 static DEVICE_ATTR_RO(client_monitor_latency);
 
@@ -268,9 +264,8 @@ static ssize_t server_monitor_conn_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_conn_id(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[0]));
+       return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+                         vmbus_connection.monitor_pages[0]));
 }
 static DEVICE_ATTR_RO(server_monitor_conn_id);
 
@@ -282,9 +277,8 @@ static ssize_t client_monitor_conn_id_show(struct device *dev,
 
        if (!hv_dev->channel)
                return -ENODEV;
-       return sprintf(buf, "%d\n",
-                      channel_conn_id(hv_dev->channel,
-                                      vmbus_connection.monitor_pages[1]));
+       return sysfs_emit(buf, "%d\n", channel_conn_id(hv_dev->channel,
+                         vmbus_connection.monitor_pages[1]));
 }
 static DEVICE_ATTR_RO(client_monitor_conn_id);
 
@@ -303,7 +297,7 @@ static ssize_t out_intr_mask_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
+       return sysfs_emit(buf, "%d\n", outbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(out_intr_mask);
 
@@ -321,7 +315,7 @@ static ssize_t out_read_index_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.current_read_index);
+       return sysfs_emit(buf, "%d\n", outbound.current_read_index);
 }
 static DEVICE_ATTR_RO(out_read_index);
 
@@ -340,7 +334,7 @@ static ssize_t out_write_index_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.current_write_index);
+       return sysfs_emit(buf, "%d\n", outbound.current_write_index);
 }
 static DEVICE_ATTR_RO(out_write_index);
 
@@ -359,7 +353,7 @@ static ssize_t out_read_bytes_avail_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
+       return sysfs_emit(buf, "%d\n", outbound.bytes_avail_toread);
 }
 static DEVICE_ATTR_RO(out_read_bytes_avail);
 
@@ -378,7 +372,7 @@ static ssize_t out_write_bytes_avail_show(struct device *dev,
                                          &outbound);
        if (ret < 0)
                return ret;
-       return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
+       return sysfs_emit(buf, "%d\n", outbound.bytes_avail_towrite);
 }
 static DEVICE_ATTR_RO(out_write_bytes_avail);
 
@@ -396,7 +390,7 @@ static ssize_t in_intr_mask_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
+       return sysfs_emit(buf, "%d\n", inbound.current_interrupt_mask);
 }
 static DEVICE_ATTR_RO(in_intr_mask);
 
@@ -414,7 +408,7 @@ static ssize_t in_read_index_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.current_read_index);
+       return sysfs_emit(buf, "%d\n", inbound.current_read_index);
 }
 static DEVICE_ATTR_RO(in_read_index);
 
@@ -432,7 +426,7 @@ static ssize_t in_write_index_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.current_write_index);
+       return sysfs_emit(buf, "%d\n", inbound.current_write_index);
 }
 static DEVICE_ATTR_RO(in_write_index);
 
@@ -451,7 +445,7 @@ static ssize_t in_read_bytes_avail_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
+       return sysfs_emit(buf, "%d\n", inbound.bytes_avail_toread);
 }
 static DEVICE_ATTR_RO(in_read_bytes_avail);
 
@@ -470,7 +464,7 @@ static ssize_t in_write_bytes_avail_show(struct device *dev,
        if (ret < 0)
                return ret;
 
-       return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
+       return sysfs_emit(buf, "%d\n", inbound.bytes_avail_towrite);
 }
 static DEVICE_ATTR_RO(in_write_bytes_avail);
 
@@ -480,7 +474,7 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
        struct vmbus_channel *channel = hv_dev->channel, *cur_sc;
-       int buf_size = PAGE_SIZE, n_written, tot_written;
+       int n_written;
        struct list_head *cur;
 
        if (!channel)
@@ -488,25 +482,21 @@ static ssize_t channel_vp_mapping_show(struct device *dev,
 
        mutex_lock(&vmbus_connection.channel_mutex);
 
-       tot_written = snprintf(buf, buf_size, "%u:%u\n",
-               channel->offermsg.child_relid, channel->target_cpu);
+       n_written = sysfs_emit(buf, "%u:%u\n",
+                              channel->offermsg.child_relid,
+                              channel->target_cpu);
 
        list_for_each(cur, &channel->sc_list) {
-               if (tot_written >= buf_size - 1)
-                       break;
 
                cur_sc = list_entry(cur, struct vmbus_channel, sc_list);
-               n_written = scnprintf(buf + tot_written,
-                                    buf_size - tot_written,
-                                    "%u:%u\n",
-                                    cur_sc->offermsg.child_relid,
-                                    cur_sc->target_cpu);
-               tot_written += n_written;
+               n_written += sysfs_emit_at(buf, n_written, "%u:%u\n",
+                                         cur_sc->offermsg.child_relid,
+                                         cur_sc->target_cpu);
        }
 
        mutex_unlock(&vmbus_connection.channel_mutex);
 
-       return tot_written;
+       return n_written;
 }
 static DEVICE_ATTR_RO(channel_vp_mapping);
 
@@ -516,7 +506,7 @@ static ssize_t vendor_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
 
-       return sprintf(buf, "0x%x\n", hv_dev->vendor_id);
+       return sysfs_emit(buf, "0x%x\n", hv_dev->vendor_id);
 }
 static DEVICE_ATTR_RO(vendor);
 
@@ -526,7 +516,7 @@ static ssize_t device_show(struct device *dev,
 {
        struct hv_device *hv_dev = device_to_hv_device(dev);
 
-       return sprintf(buf, "0x%x\n", hv_dev->device_id);
+       return sysfs_emit(buf, "0x%x\n", hv_dev->device_id);
 }
 static DEVICE_ATTR_RO(device);
 
@@ -551,7 +541,7 @@ static ssize_t driver_override_show(struct device *dev,
        ssize_t len;
 
        device_lock(dev);
-       len = snprintf(buf, PAGE_SIZE, "%s\n", hv_dev->driver_override);
+       len = sysfs_emit(buf, "%s\n", hv_dev->driver_override);
        device_unlock(dev);
 
        return len;
index 33228c1c8980f32a5e8af323587601a4783b5b7f..ac6754a85f3507ee88bd3847359a53292a14dc9f 100644 (file)
@@ -3232,28 +3232,29 @@ static void iommu_snp_enable(void)
                return;
        /*
         * The SNP support requires that IOMMU must be enabled, and is
-        * not configured in the passthrough mode.
+        * configured with V1 page table (DTE[Mode] = 0 is not supported).
         */
        if (no_iommu || iommu_default_passthrough()) {
-               pr_err("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
-               cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
-               return;
+               pr_warn("SNP: IOMMU disabled or configured in passthrough mode, SNP cannot be supported.\n");
+               goto disable_snp;
+       }
+
+       if (amd_iommu_pgtable != AMD_IOMMU_V1) {
+               pr_warn("SNP: IOMMU is configured with V2 page table mode, SNP cannot be supported.\n");
+               goto disable_snp;
        }
 
        amd_iommu_snp_en = check_feature(FEATURE_SNP);
        if (!amd_iommu_snp_en) {
-               pr_err("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
-               cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
-               return;
+               pr_warn("SNP: IOMMU SNP feature not enabled, SNP cannot be supported.\n");
+               goto disable_snp;
        }
 
        pr_info("IOMMU SNP support enabled.\n");
+       return;
 
-       /* Enforce IOMMU v1 pagetable when SNP is enabled. */
-       if (amd_iommu_pgtable != AMD_IOMMU_V1) {
-               pr_warn("Forcing use of AMD IOMMU v1 page table due to SNP.\n");
-               amd_iommu_pgtable = AMD_IOMMU_V1;
-       }
+disable_snp:
+       cc_platform_clear(CC_ATTR_HOST_SEV_SNP);
 #endif
 }
 
index d35c1b8c8e65ce5a9c6f6ae3aae555d91eb4d3d0..e692217fcb28011478139d7dc146d74dcd9456e8 100644 (file)
@@ -1692,26 +1692,29 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
 
 static u16 domain_id_alloc(void)
 {
+       unsigned long flags;
        int id;
 
-       spin_lock(&pd_bitmap_lock);
+       spin_lock_irqsave(&pd_bitmap_lock, flags);
        id = find_first_zero_bit(amd_iommu_pd_alloc_bitmap, MAX_DOMAIN_ID);
        BUG_ON(id == 0);
        if (id > 0 && id < MAX_DOMAIN_ID)
                __set_bit(id, amd_iommu_pd_alloc_bitmap);
        else
                id = 0;
-       spin_unlock(&pd_bitmap_lock);
+       spin_unlock_irqrestore(&pd_bitmap_lock, flags);
 
        return id;
 }
 
 static void domain_id_free(int id)
 {
-       spin_lock(&pd_bitmap_lock);
+       unsigned long flags;
+
+       spin_lock_irqsave(&pd_bitmap_lock, flags);
        if (id > 0 && id < MAX_DOMAIN_ID)
                __clear_bit(id, amd_iommu_pd_alloc_bitmap);
-       spin_unlock(&pd_bitmap_lock);
+       spin_unlock_irqrestore(&pd_bitmap_lock, flags);
 }
 
 static void free_gcr3_tbl_level1(u64 *tbl)
index 50eb9aed47cc585e1307b3d0f47252b2edcdaeb0..a7ecd90303dc42f9fbe120e75f2053b1390c5445 100644 (file)
@@ -4299,9 +4299,11 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
        }
 
        dev_iommu_priv_set(dev, info);
-       ret = device_rbtree_insert(iommu, info);
-       if (ret)
-               goto free;
+       if (pdev && pci_ats_supported(pdev)) {
+               ret = device_rbtree_insert(iommu, info);
+               if (ret)
+                       goto free;
+       }
 
        if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
                ret = intel_pasid_alloc_table(dev);
@@ -4336,7 +4338,8 @@ static void intel_iommu_release_device(struct device *dev)
        struct intel_iommu *iommu = info->iommu;
 
        mutex_lock(&iommu->iopf_lock);
-       device_rbtree_remove(info);
+       if (dev_is_pci(dev) && pci_ats_supported(to_pci_dev(dev)))
+               device_rbtree_remove(info);
        mutex_unlock(&iommu->iopf_lock);
 
        if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
index cf43e798eca49936e79a20ea5397a6b0e9f1cc82..44083d01852dbf997f8cc4001f3b278ea5d7fa07 100644 (file)
@@ -438,7 +438,7 @@ static int iommu_pmu_assign_event(struct iommu_pmu *iommu_pmu,
        iommu_pmu_set_filter(domain, event->attr.config1,
                             IOMMU_PMU_FILTER_DOMAIN, idx,
                             event->attr.config1);
-       iommu_pmu_set_filter(pasid, event->attr.config1,
+       iommu_pmu_set_filter(pasid, event->attr.config2,
                             IOMMU_PMU_FILTER_PASID, idx,
                             event->attr.config1);
        iommu_pmu_set_filter(ats, event->attr.config2,
index c1bed89b102614adf6f71070080aa513729f4409..ee3b469e2da1551889ba0e200f386e010bc6f68f 100644 (file)
@@ -66,7 +66,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu)
        struct page *pages;
        int irq, ret;
 
-       pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
+       pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
        if (!pages) {
                pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
                        iommu->name);
index b8c47f18bc2612407cf58bb80bc041e27967d139..6a2707fe7a78c09d04f84a78d0b498d7a960d73d 100644 (file)
@@ -1790,6 +1790,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
        { .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
        {}
 };
+MODULE_DEVICE_TABLE(of, mtk_iommu_of_ids);
 
 static struct platform_driver mtk_iommu_driver = {
        .probe  = mtk_iommu_probe,
index a9fa2a54dc9b39a981ccc4e66f72eff5329de49e..d6e4002200bd33d6219ed09f1c90ccac0e3404e4 100644 (file)
@@ -600,6 +600,7 @@ static const struct of_device_id mtk_iommu_v1_of_ids[] = {
        { .compatible = "mediatek,mt2701-m4u", },
        {}
 };
+MODULE_DEVICE_TABLE(of, mtk_iommu_v1_of_ids);
 
 static const struct component_master_ops mtk_iommu_v1_com_ops = {
        .bind           = mtk_iommu_v1_bind,
index fca888b36680df813c952d8d29e1cf74cd81e167..2a537cbfcb077246c0aee43a5b9f1885a3e0b5f2 100644 (file)
@@ -786,6 +786,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
                                           struct its_cmd_block *cmd,
                                           struct its_cmd_desc *desc)
 {
+       struct its_vpe *vpe = valid_vpe(its, desc->its_vmapp_cmd.vpe);
        unsigned long vpt_addr, vconf_addr;
        u64 target;
        bool alloc;
@@ -798,6 +799,11 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
                if (is_v4_1(its)) {
                        alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count);
                        its_encode_alloc(cmd, alloc);
+                       /*
+                        * Unmapping a VPE is self-synchronizing on GICv4.1,
+                        * no need to issue a VSYNC.
+                        */
+                       vpe = NULL;
                }
 
                goto out;
@@ -832,7 +838,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
 out:
        its_fixup_cmd(cmd);
 
-       return valid_vpe(its, desc->its_vmapp_cmd.vpe);
+       return vpe;
 }
 
 static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
index 2776ca5fc33f39019062b3d9fb8f02547a5e4139..b215b28cad7b76a5764bda8021cece74ec5cd40f 100644 (file)
@@ -401,23 +401,23 @@ data_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 
 static int data_sock_setsockopt(struct socket *sock, int level, int optname,
-                               sockptr_t optval, unsigned int len)
+                               sockptr_t optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
        int err = 0, opt = 0;
 
        if (*debug & DEBUG_SOCKET)
                printk(KERN_DEBUG "%s(%p, %d, %x, optval, %d)\n", __func__, sock,
-                      level, optname, len);
+                      level, optname, optlen);
 
        lock_sock(sk);
 
        switch (optname) {
        case MISDN_TIME_STAMP:
-               if (copy_from_sockptr(&opt, optval, sizeof(int))) {
-                       err = -EFAULT;
+               err = copy_safe_from_sockptr(&opt, sizeof(opt),
+                                            optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        _pms(sk)->cmask |= MISDN_TIME_STAMP;
index be8ac24f50b6ad651fd107f9af9a448bb1f7780a..7b8a71ca66dde0f4f6f3c2728107cb48cfcaa706 100644 (file)
@@ -1558,7 +1558,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
                for (j = 0; j < i; j++)
                        if (r1_bio->bios[j])
                                rdev_dec_pending(conf->mirrors[j].rdev, mddev);
-               free_r1bio(r1_bio);
+               mempool_free(r1_bio, &conf->r1bio_pool);
                allow_barrier(conf, bio->bi_iter.bi_sector);
 
                if (bio->bi_opf & REQ_NOWAIT) {
index 4c34344dc7dcb876e29d66358bcfcc79e1e77705..d7027d600208fc2f7233c5ca01ab7d590ef33042 100644 (file)
@@ -50,12 +50,12 @@ static void mtk_vcodec_vpu_reset_dec_handler(void *priv)
 
        dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
 
-       mutex_lock(&dev->dev_mutex);
+       mutex_lock(&dev->dev_ctx_lock);
        list_for_each_entry(ctx, &dev->ctx_list, list) {
                ctx->state = MTK_STATE_ABORT;
                mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
        }
-       mutex_unlock(&dev->dev_mutex);
+       mutex_unlock(&dev->dev_ctx_lock);
 }
 
 static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
@@ -65,12 +65,12 @@ static void mtk_vcodec_vpu_reset_enc_handler(void *priv)
 
        dev_err(&dev->plat_dev->dev, "Watchdog timeout!!");
 
-       mutex_lock(&dev->dev_mutex);
+       mutex_lock(&dev->dev_ctx_lock);
        list_for_each_entry(ctx, &dev->ctx_list, list) {
                ctx->state = MTK_STATE_ABORT;
                mtk_v4l2_vdec_dbg(0, ctx, "[%d] Change to state MTK_STATE_ABORT", ctx->id);
        }
-       mutex_unlock(&dev->dev_mutex);
+       mutex_unlock(&dev->dev_ctx_lock);
 }
 
 static const struct mtk_vcodec_fw_ops mtk_vcodec_vpu_msg = {
index f47c98faf068b6250de0c46a45efbca641a0e0ad..2073781ccadb156116b1cbe86c49b3e06b7a93f3 100644 (file)
@@ -268,7 +268,9 @@ static int fops_vcodec_open(struct file *file)
 
        ctx->dev->vdec_pdata->init_vdec_params(ctx);
 
+       mutex_lock(&dev->dev_ctx_lock);
        list_add(&ctx->list, &dev->ctx_list);
+       mutex_unlock(&dev->dev_ctx_lock);
        mtk_vcodec_dbgfs_create(ctx);
 
        mutex_unlock(&dev->dev_mutex);
@@ -311,7 +313,9 @@ static int fops_vcodec_release(struct file *file)
        v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
 
        mtk_vcodec_dbgfs_remove(dev, ctx->id);
+       mutex_lock(&dev->dev_ctx_lock);
        list_del_init(&ctx->list);
+       mutex_unlock(&dev->dev_ctx_lock);
        kfree(ctx);
        mutex_unlock(&dev->dev_mutex);
        return 0;
@@ -404,6 +408,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
        for (i = 0; i < MTK_VDEC_HW_MAX; i++)
                mutex_init(&dev->dec_mutex[i]);
        mutex_init(&dev->dev_mutex);
+       mutex_init(&dev->dev_ctx_lock);
        spin_lock_init(&dev->irqlock);
 
        snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
index 849b89dd205c21d686d7fcfc3624df79f99e4449..85b2c0d3d8bcdd3a59027ddccd1efeb4371292c9 100644 (file)
@@ -241,6 +241,7 @@ struct mtk_vcodec_dec_ctx {
  *
  * @dec_mutex: decoder hardware lock
  * @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
  * @decode_workqueue: decode work queue
  *
  * @irqlock: protect data access by irq handler and work thread
@@ -282,6 +283,7 @@ struct mtk_vcodec_dec_dev {
        /* decoder hardware mutex lock */
        struct mutex dec_mutex[MTK_VDEC_HW_MAX];
        struct mutex dev_mutex;
+       struct mutex dev_ctx_lock;
        struct workqueue_struct *decode_workqueue;
 
        spinlock_t irqlock;
index 06ed47df693bfd049fe5537abb6b994c1b740b85..21836dd6ef85a36f4bfc7e781f0a5b57f6c1962d 100644 (file)
@@ -869,7 +869,6 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
        inst->vpu.codec_type = ctx->current_codec;
        inst->vpu.capture_type = ctx->capture_fourcc;
 
-       ctx->drv_handle = inst;
        err = vpu_dec_init(&inst->vpu);
        if (err) {
                mtk_vdec_err(ctx, "vdec_hevc init err=%d", err);
@@ -898,6 +897,7 @@ static int vdec_hevc_slice_init(struct mtk_vcodec_dec_ctx *ctx)
        mtk_vdec_debug(ctx, "lat hevc instance >> %p, codec_type = 0x%x",
                       inst, inst->vpu.codec_type);
 
+       ctx->drv_handle = inst;
        return 0;
 error_free_inst:
        kfree(inst);
index 19407f9bc773c34445613ed8311fb86b1b565d38..987b3d71b662ac98495604e535f6ece7b733b8dd 100644 (file)
@@ -449,7 +449,7 @@ static int vdec_vp8_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
                       inst->frm_cnt, y_fb_dma, c_fb_dma, fb);
 
        inst->cur_fb = fb;
-       dec->bs_dma = (unsigned long)bs->dma_addr;
+       dec->bs_dma = (uint64_t)bs->dma_addr;
        dec->bs_sz = bs->size;
        dec->cur_y_fb_dma = y_fb_dma;
        dec->cur_c_fb_dma = c_fb_dma;
index 55355fa7009083cacba971e0e3f0981e09f80300..039082f600c813f8e703fd283843ee1bddbe31c8 100644 (file)
@@ -16,6 +16,7 @@
 #include "../vdec_drv_base.h"
 #include "../vdec_vpu_if.h"
 
+#define VP9_MAX_SUPER_FRAMES_NUM 8
 #define VP9_SUPER_FRAME_BS_SZ 64
 #define MAX_VP9_DPB_SIZE       9
 
@@ -133,11 +134,11 @@ struct vp9_sf_ref_fb {
  */
 struct vdec_vp9_vsi {
        unsigned char sf_bs_buf[VP9_SUPER_FRAME_BS_SZ];
-       struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_FRM_BUF_NUM-1];
+       struct vp9_sf_ref_fb sf_ref_fb[VP9_MAX_SUPER_FRAMES_NUM];
        int sf_next_ref_fb_idx;
        unsigned int sf_frm_cnt;
-       unsigned int sf_frm_offset[VP9_MAX_FRM_BUF_NUM-1];
-       unsigned int sf_frm_sz[VP9_MAX_FRM_BUF_NUM-1];
+       unsigned int sf_frm_offset[VP9_MAX_SUPER_FRAMES_NUM];
+       unsigned int sf_frm_sz[VP9_MAX_SUPER_FRAMES_NUM];
        unsigned int sf_frm_idx;
        unsigned int sf_init;
        struct vdec_fb fb;
@@ -526,7 +527,7 @@ static void vp9_swap_frm_bufs(struct vdec_vp9_inst *inst)
        /* if this super frame and it is not last sub-frame, get next fb for
         * sub-frame decode
         */
-       if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt - 1)
+       if (vsi->sf_frm_cnt > 0 && vsi->sf_frm_idx != vsi->sf_frm_cnt)
                vsi->sf_next_ref_fb_idx = vp9_get_sf_ref_fb(inst);
 }
 
@@ -735,7 +736,7 @@ static void get_free_fb(struct vdec_vp9_inst *inst, struct vdec_fb **out_fb)
 
 static int validate_vsi_array_indexes(struct vdec_vp9_inst *inst,
                struct vdec_vp9_vsi *vsi) {
-       if (vsi->sf_frm_idx >= VP9_MAX_FRM_BUF_NUM - 1) {
+       if (vsi->sf_frm_idx > VP9_MAX_SUPER_FRAMES_NUM) {
                mtk_vdec_err(inst->ctx, "Invalid vsi->sf_frm_idx=%u.", vsi->sf_frm_idx);
                return -EIO;
        }
index cf48d09b78d7a156440e1343448af946342d26e9..eea709d93820919d33d13184af7281fe9f0035fc 100644 (file)
@@ -1074,7 +1074,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
        unsigned int mi_row;
        unsigned int mi_col;
        unsigned int offset;
-       unsigned int pa;
+       dma_addr_t pa;
        unsigned int size;
        struct vdec_vp9_slice_tiles *tiles;
        unsigned char *pos;
@@ -1109,7 +1109,7 @@ static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *inst
        pos = va + offset;
        end = va + bs->size;
        /* truncated */
-       pa = (unsigned int)bs->dma_addr + offset;
+       pa = bs->dma_addr + offset;
        tb = instance->tile.va;
        for (i = 0; i < rows; i++) {
                for (j = 0; j < cols; j++) {
index 82e57ae983d55777463b4d7b08ac6fc18f3ec675..da6be556727bb18a458e1e59235615dc9b42c05f 100644 (file)
@@ -77,12 +77,14 @@ static bool vpu_dec_check_ap_inst(struct mtk_vcodec_dec_dev *dec_dev, struct vde
        struct mtk_vcodec_dec_ctx *ctx;
        int ret = false;
 
+       mutex_lock(&dec_dev->dev_ctx_lock);
        list_for_each_entry(ctx, &dec_dev->ctx_list, list) {
                if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
                        ret = true;
                        break;
                }
        }
+       mutex_unlock(&dec_dev->dev_ctx_lock);
 
        return ret;
 }
index 6319f24bc714b5eb3a7018f1e612afcf2dadf25e..3cb8a16222220e2d5480b48b48879112a68fc11f 100644 (file)
@@ -177,7 +177,9 @@ static int fops_vcodec_open(struct file *file)
        mtk_v4l2_venc_dbg(2, ctx, "Create instance [%d]@%p m2m_ctx=%p ",
                          ctx->id, ctx, ctx->m2m_ctx);
 
+       mutex_lock(&dev->dev_ctx_lock);
        list_add(&ctx->list, &dev->ctx_list);
+       mutex_unlock(&dev->dev_ctx_lock);
 
        mutex_unlock(&dev->dev_mutex);
        mtk_v4l2_venc_dbg(0, ctx, "%s encoder [%d]", dev_name(&dev->plat_dev->dev),
@@ -212,7 +214,9 @@ static int fops_vcodec_release(struct file *file)
        v4l2_fh_exit(&ctx->fh);
        v4l2_ctrl_handler_free(&ctx->ctrl_hdl);
 
+       mutex_lock(&dev->dev_ctx_lock);
        list_del_init(&ctx->list);
+       mutex_unlock(&dev->dev_ctx_lock);
        kfree(ctx);
        mutex_unlock(&dev->dev_mutex);
        return 0;
@@ -294,6 +298,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev)
 
        mutex_init(&dev->enc_mutex);
        mutex_init(&dev->dev_mutex);
+       mutex_init(&dev->dev_ctx_lock);
        spin_lock_init(&dev->irqlock);
 
        snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), "%s",
index a042f607ed8d1645a9dc3cf199b89e4280bc8337..0bd85d0fb379acbba3ac07c01e780cf57bef0305 100644 (file)
@@ -178,6 +178,7 @@ struct mtk_vcodec_enc_ctx {
  *
  * @enc_mutex: encoder hardware lock.
  * @dev_mutex: video_device lock
+ * @dev_ctx_lock: the lock of context list
  * @encode_workqueue: encode work queue
  *
  * @enc_irq: h264 encoder irq resource
@@ -205,6 +206,7 @@ struct mtk_vcodec_enc_dev {
        /* encoder hardware mutex lock */
        struct mutex enc_mutex;
        struct mutex dev_mutex;
+       struct mutex dev_ctx_lock;
        struct workqueue_struct *encode_workqueue;
 
        int enc_irq;
index 84ad1cc6ad171ef2ea2767653d60e6d779e5604e..51bb7ee141b9e58ac98f940f5e419d9ef4df37ca 100644 (file)
@@ -47,12 +47,14 @@ static bool vpu_enc_check_ap_inst(struct mtk_vcodec_enc_dev *enc_dev, struct ven
        struct mtk_vcodec_enc_ctx *ctx;
        int ret = false;
 
+       mutex_lock(&enc_dev->dev_ctx_lock);
        list_for_each_entry(ctx, &enc_dev->ctx_list, list) {
                if (!IS_ERR_OR_NULL(ctx) && ctx->vpu_inst == vpu) {
                        ret = true;
                        break;
                }
        }
+       mutex_unlock(&enc_dev->dev_ctx_lock);
 
        return ret;
 }
index 088f8ed4fdc4640d706a98d317e79668a6942748..a8ee0df471482393214c379169b3c7a340282296 100644 (file)
@@ -1114,10 +1114,25 @@ static void mmc_omap_set_power(struct mmc_omap_slot *slot, int power_on,
 
        host = slot->host;
 
-       if (slot->vsd)
-               gpiod_set_value(slot->vsd, power_on);
-       if (slot->vio)
-               gpiod_set_value(slot->vio, power_on);
+       if (power_on) {
+               if (slot->vsd) {
+                       gpiod_set_value(slot->vsd, power_on);
+                       msleep(1);
+               }
+               if (slot->vio) {
+                       gpiod_set_value(slot->vio, power_on);
+                       msleep(1);
+               }
+       } else {
+               if (slot->vio) {
+                       gpiod_set_value(slot->vio, power_on);
+                       msleep(50);
+               }
+               if (slot->vsd) {
+                       gpiod_set_value(slot->vsd, power_on);
+                       msleep(50);
+               }
+       }
 
        if (slot->pdata->set_power != NULL)
                slot->pdata->set_power(mmc_dev(slot->mmc), slot->id, power_on,
@@ -1254,18 +1269,18 @@ static int mmc_omap_new_slot(struct mmc_omap_host *host, int id)
        slot->pdata = &host->pdata->slots[id];
 
        /* Check for some optional GPIO controls */
-       slot->vsd = gpiod_get_index_optional(host->dev, "vsd",
-                                            id, GPIOD_OUT_LOW);
+       slot->vsd = devm_gpiod_get_index_optional(host->dev, "vsd",
+                                                 id, GPIOD_OUT_LOW);
        if (IS_ERR(slot->vsd))
                return dev_err_probe(host->dev, PTR_ERR(slot->vsd),
                                     "error looking up VSD GPIO\n");
-       slot->vio = gpiod_get_index_optional(host->dev, "vio",
-                                            id, GPIOD_OUT_LOW);
+       slot->vio = devm_gpiod_get_index_optional(host->dev, "vio",
+                                                 id, GPIOD_OUT_LOW);
        if (IS_ERR(slot->vio))
                return dev_err_probe(host->dev, PTR_ERR(slot->vio),
                                     "error looking up VIO GPIO\n");
-       slot->cover = gpiod_get_index_optional(host->dev, "cover",
-                                               id, GPIOD_IN);
+       slot->cover = devm_gpiod_get_index_optional(host->dev, "cover",
+                                                   id, GPIOD_IN);
        if (IS_ERR(slot->cover))
                return dev_err_probe(host->dev, PTR_ERR(slot->cover),
                                     "error looking up cover switch GPIO\n");
@@ -1379,13 +1394,6 @@ static int mmc_omap_probe(struct platform_device *pdev)
        if (IS_ERR(host->virt_base))
                return PTR_ERR(host->virt_base);
 
-       host->slot_switch = gpiod_get_optional(host->dev, "switch",
-                                              GPIOD_OUT_LOW);
-       if (IS_ERR(host->slot_switch))
-               return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
-                                    "error looking up slot switch GPIO\n");
-
-
        INIT_WORK(&host->slot_release_work, mmc_omap_slot_release_work);
        INIT_WORK(&host->send_stop_work, mmc_omap_send_stop_work);
 
@@ -1404,6 +1412,12 @@ static int mmc_omap_probe(struct platform_device *pdev)
        host->dev = &pdev->dev;
        platform_set_drvdata(pdev, host);
 
+       host->slot_switch = devm_gpiod_get_optional(host->dev, "switch",
+                                                   GPIOD_OUT_LOW);
+       if (IS_ERR(host->slot_switch))
+               return dev_err_probe(host->dev, PTR_ERR(host->slot_switch),
+                                    "error looking up slot switch GPIO\n");
+
        host->id = pdev->id;
        host->irq = irq;
        host->phys_base = res->start;
index 1035820c2377af7d73d401824734e949b7679c8d..8090390edaf9dbb6832c6e30c25bb0ad1068e6cc 100644 (file)
@@ -950,20 +950,173 @@ static void mt7530_setup_port5(struct dsa_switch *ds, phy_interface_t interface)
        mutex_unlock(&priv->reg_mutex);
 }
 
-/* On page 205, section "8.6.3 Frame filtering" of the active standard, IEEE Std
- * 802.1Qâ„¢-2022, it is stated that frames with 01:80:C2:00:00:00-0F as MAC DA
- * must only be propagated to C-VLAN and MAC Bridge components. That means
- * VLAN-aware and VLAN-unaware bridges. On the switch designs with CPU ports,
- * these frames are supposed to be processed by the CPU (software). So we make
- * the switch only forward them to the CPU port. And if received from a CPU
- * port, forward to a single port. The software is responsible of making the
- * switch conform to the latter by setting a single port as destination port on
- * the special tag.
+/* In Clause 5 of IEEE Std 802-2014, two sublayers of the data link layer (DLL)
+ * of the Open Systems Interconnection basic reference model (OSI/RM) are
+ * described; the medium access control (MAC) and logical link control (LLC)
+ * sublayers. The MAC sublayer is the one facing the physical layer.
  *
- * This switch intellectual property cannot conform to this part of the standard
- * fully. Whilst the REV_UN frame tag covers the remaining :04-0D and :0F MAC
- * DAs, it also includes :22-FF which the scope of propagation is not supposed
- * to be restricted for these MAC DAs.
+ * In 8.2 of IEEE Std 802.1Q-2022, the Bridge architecture is described. A
+ * Bridge component comprises a MAC Relay Entity for interconnecting the Ports
+ * of the Bridge, at least two Ports, and higher layer entities with at least a
+ * Spanning Tree Protocol Entity included.
+ *
+ * Each Bridge Port also functions as an end station and shall provide the MAC
+ * Service to an LLC Entity. Each instance of the MAC Service is provided to a
+ * distinct LLC Entity that supports protocol identification, multiplexing, and
+ * demultiplexing, for protocol data unit (PDU) transmission and reception by
+ * one or more higher layer entities.
+ *
+ * It is described in 8.13.9 of IEEE Std 802.1Q-2022 that in a Bridge, the LLC
+ * Entity associated with each Bridge Port is modeled as being directly
+ * connected to the attached Local Area Network (LAN).
+ *
+ * On the switch with CPU port architecture, CPU port functions as Management
+ * Port, and the Management Port functionality is provided by software which
+ * functions as an end station. Software is connected to an IEEE 802 LAN that is
+ * wholly contained within the system that incorporates the Bridge. Software
+ * provides access to the LLC Entity associated with each Bridge Port by the
+ * value of the source port field on the special tag on the frame received by
+ * software.
+ *
+ * We call frames that carry control information to determine the active
+ * topology and current extent of each Virtual Local Area Network (VLAN), i.e.,
+ * spanning tree or Shortest Path Bridging (SPB) and Multiple VLAN Registration
+ * Protocol Data Units (MVRPDUs), and frames from other link constrained
+ * protocols, such as Extensible Authentication Protocol over LAN (EAPOL) and
+ * Link Layer Discovery Protocol (LLDP), link-local frames. They are not
+ * forwarded by a Bridge. Permanently configured entries in the filtering
+ * database (FDB) ensure that such frames are discarded by the Forwarding
+ * Process. In 8.6.3 of IEEE Std 802.1Q-2022, this is described in detail:
+ *
+ * Each of the reserved MAC addresses specified in Table 8-1
+ * (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]) shall be
+ * permanently configured in the FDB in C-VLAN components and ERs.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-2
+ * (01-80-C2-00-00-[01,02,03,04,05,06,07,08,09,0A,0E]) shall be permanently
+ * configured in the FDB in S-VLAN components.
+ *
+ * Each of the reserved MAC addresses specified in Table 8-3
+ * (01-80-C2-00-00-[01,02,04,0E]) shall be permanently configured in the FDB in
+ * TPMR components.
+ *
+ * The FDB entries for reserved MAC addresses shall specify filtering for all
+ * Bridge Ports and all VIDs. Management shall not provide the capability to
+ * modify or remove entries for reserved MAC addresses.
+ *
+ * The addresses in Table 8-1, Table 8-2, and Table 8-3 determine the scope of
+ * propagation of PDUs within a Bridged Network, as follows:
+ *
+ *   The Nearest Bridge group address (01-80-C2-00-00-0E) is an address that no
+ *   conformant Two-Port MAC Relay (TPMR) component, Service VLAN (S-VLAN)
+ *   component, Customer VLAN (C-VLAN) component, or MAC Bridge can forward.
+ *   PDUs transmitted using this destination address, or any other addresses
+ *   that appear in Table 8-1, Table 8-2, and Table 8-3
+ *   (01-80-C2-00-00-[00,01,02,03,04,05,06,07,08,09,0A,0B,0C,0D,0E,0F]), can
+ *   therefore travel no further than those stations that can be reached via a
+ *   single individual LAN from the originating station.
+ *
+ *   The Nearest non-TPMR Bridge group address (01-80-C2-00-00-03), is an
+ *   address that no conformant S-VLAN component, C-VLAN component, or MAC
+ *   Bridge can forward; however, this address is relayed by a TPMR component.
+ *   PDUs using this destination address, or any of the other addresses that
+ *   appear in both Table 8-1 and Table 8-2 but not in Table 8-3
+ *   (01-80-C2-00-00-[00,03,05,06,07,08,09,0A,0B,0C,0D,0F]), will be relayed by
+ *   any TPMRs but will propagate no further than the nearest S-VLAN component,
+ *   C-VLAN component, or MAC Bridge.
+ *
+ *   The Nearest Customer Bridge group address (01-80-C2-00-00-00) is an address
+ *   that no conformant C-VLAN component, MAC Bridge can forward; however, it is
+ *   relayed by TPMR components and S-VLAN components. PDUs using this
+ *   destination address, or any of the other addresses that appear in Table 8-1
+ *   but not in either Table 8-2 or Table 8-3 (01-80-C2-00-00-[00,0B,0C,0D,0F]),
+ *   will be relayed by TPMR components and S-VLAN components but will propagate
+ *   no further than the nearest C-VLAN component or MAC Bridge.
+ *
+ * Because the LLC Entity associated with each Bridge Port is provided via CPU
+ * port, we must not filter these frames but forward them to CPU port.
+ *
+ * In a Bridge, the transmission Port is majorly decided by ingress and egress
+ * rules, FDB, and spanning tree Port State functions of the Forwarding Process.
+ * For link-local frames, only CPU port should be designated as destination port
+ * in the FDB, and the other functions of the Forwarding Process must not
+ * interfere with the decision of the transmission Port. We call this process
+ * trapping frames to CPU port.
+ *
+ * Therefore, on the switch with CPU port architecture, link-local frames must
+ * be trapped to CPU port, and certain link-local frames received by a Port of a
+ * Bridge comprising a TPMR component or an S-VLAN component must be excluded
+ * from it.
+ *
+ * A Bridge of the switch with CPU port architecture cannot comprise a Two-Port
+ * MAC Relay (TPMR) component as a TPMR component supports only a subset of the
+ * functionality of a MAC Bridge. A Bridge comprising two Ports (Management Port
+ * doesn't count) of this architecture will either function as a standard MAC
+ * Bridge or a standard VLAN Bridge.
+ *
+ * Therefore, a Bridge of this architecture can only comprise S-VLAN components,
+ * C-VLAN components, or MAC Bridge components. Since there's no TPMR component,
+ * we don't need to relay PDUs using the destination addresses specified on the
+ * Nearest non-TPMR section, and the proportion of the Nearest Customer Bridge
+ * section where they must be relayed by TPMR components.
+ *
+ * One option to trap link-local frames to CPU port is to add static FDB entries
+ * with CPU port designated as destination port. However, because that
+ * Independent VLAN Learning (IVL) is being used on every VID, each entry only
+ * applies to a single VLAN Identifier (VID). For a Bridge comprising a MAC
+ * Bridge component or a C-VLAN component, there would have to be 16 times 4096
+ * entries. This switch intellectual property can only hold a maximum of 2048
+ * entries. Using this option, there also isn't a mechanism to prevent
+ * link-local frames from being discarded when the spanning tree Port State of
+ * the reception Port is discarding.
+ *
+ * The remaining option is to utilise the BPC, RGAC1, RGAC2, RGAC3, and RGAC4
+ * registers. Whilst this applies to every VID, it doesn't contain all of the
+ * reserved MAC addresses without affecting the remaining Standard Group MAC
+ * Addresses. The REV_UN frame tag utilised using the RGAC4 register covers the
+ * remaining 01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F] destination
+ * addresses. It also includes the 01-80-C2-00-00-22 to 01-80-C2-00-00-FF
+ * destination addresses which may be relayed by MAC Bridges or VLAN Bridges.
+ * The latter option provides better but not complete conformance.
+ *
+ * This switch intellectual property also does not provide a mechanism to trap
+ * link-local frames with specific destination addresses to CPU port by Bridge,
+ * to conform to the filtering rules for the distinct Bridge components.
+ *
+ * Therefore, regardless of the type of the Bridge component, link-local frames
+ * with these destination addresses will be trapped to CPU port:
+ *
+ * 01-80-C2-00-00-[00,01,02,03,0E]
+ *
+ * In a Bridge comprising a MAC Bridge component or a C-VLAN component:
+ *
+ *   Link-local frames with these destination addresses won't be trapped to CPU
+ *   port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ *   01-80-C2-00-00-[04,05,06,07,08,09,0A,0B,0C,0D,0F]
+ *
+ * In a Bridge comprising an S-VLAN component:
+ *
+ *   Link-local frames with these destination addresses will be trapped to CPU
+ *   port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ *   01-80-C2-00-00-00
+ *
+ *   Link-local frames with these destination addresses won't be trapped to CPU
+ *   port which won't conform to IEEE Std 802.1Q-2022:
+ *
+ *   01-80-C2-00-00-[04,05,06,07,08,09,0A]
+ *
+ * To trap link-local frames to CPU port as conformant as this switch
+ * intellectual property can allow, link-local frames are made to be regarded as
+ * Bridge Protocol Data Units (BPDUs). This is because this switch intellectual
+ * property only lets the frames regarded as BPDUs bypass the spanning tree Port
+ * State function of the Forwarding Process.
+ *
+ * The only remaining interference is the ingress rules. When the reception Port
+ * has no PVID assigned on software, VLAN-untagged frames won't be allowed in.
+ * There doesn't seem to be a mechanism on the switch intellectual property to
+ * have link-local frames bypass this function of the Forwarding Process.
  */
 static void
 mt753x_trap_frames(struct mt7530_priv *priv)
@@ -971,35 +1124,43 @@ mt753x_trap_frames(struct mt7530_priv *priv)
        /* Trap 802.1X PAE frames and BPDUs to the CPU port(s) and egress them
         * VLAN-untagged.
         */
-       mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_EG_TAG_MASK |
-                  MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
-                  MT753X_BPDU_PORT_FW_MASK,
-                  MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
-                  MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_BPDU_CPU_ONLY);
+       mt7530_rmw(priv, MT753X_BPC,
+                  MT753X_PAE_BPDU_FR | MT753X_PAE_EG_TAG_MASK |
+                          MT753X_PAE_PORT_FW_MASK | MT753X_BPDU_EG_TAG_MASK |
+                          MT753X_BPDU_PORT_FW_MASK,
+                  MT753X_PAE_BPDU_FR |
+                          MT753X_PAE_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+                          MT753X_BPDU_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_BPDU_CPU_ONLY);
 
        /* Trap frames with :01 and :02 MAC DAs to the CPU port(s) and egress
         * them VLAN-untagged.
         */
-       mt7530_rmw(priv, MT753X_RGAC1, MT753X_R02_EG_TAG_MASK |
-                  MT753X_R02_PORT_FW_MASK | MT753X_R01_EG_TAG_MASK |
-                  MT753X_R01_PORT_FW_MASK,
-                  MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
-                  MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_BPDU_CPU_ONLY);
+       mt7530_rmw(priv, MT753X_RGAC1,
+                  MT753X_R02_BPDU_FR | MT753X_R02_EG_TAG_MASK |
+                          MT753X_R02_PORT_FW_MASK | MT753X_R01_BPDU_FR |
+                          MT753X_R01_EG_TAG_MASK | MT753X_R01_PORT_FW_MASK,
+                  MT753X_R02_BPDU_FR |
+                          MT753X_R02_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_R02_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+                          MT753X_R01_BPDU_FR |
+                          MT753X_R01_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_BPDU_CPU_ONLY);
 
        /* Trap frames with :03 and :0E MAC DAs to the CPU port(s) and egress
         * them VLAN-untagged.
         */
-       mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_EG_TAG_MASK |
-                  MT753X_R0E_PORT_FW_MASK | MT753X_R03_EG_TAG_MASK |
-                  MT753X_R03_PORT_FW_MASK,
-                  MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
-                  MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
-                  MT753X_BPDU_CPU_ONLY);
+       mt7530_rmw(priv, MT753X_RGAC2,
+                  MT753X_R0E_BPDU_FR | MT753X_R0E_EG_TAG_MASK |
+                          MT753X_R0E_PORT_FW_MASK | MT753X_R03_BPDU_FR |
+                          MT753X_R03_EG_TAG_MASK | MT753X_R03_PORT_FW_MASK,
+                  MT753X_R0E_BPDU_FR |
+                          MT753X_R0E_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY) |
+                          MT753X_R03_BPDU_FR |
+                          MT753X_R03_EG_TAG(MT7530_VLAN_EG_UNTAGGED) |
+                          MT753X_BPDU_CPU_ONLY);
 }
 
 static void
@@ -1722,14 +1883,16 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
 
 static int mt753x_mirror_port_get(unsigned int id, u32 val)
 {
-       return (id == ID_MT7531) ? MT7531_MIRROR_PORT_GET(val) :
-                                  MIRROR_PORT(val);
+       return (id == ID_MT7531 || id == ID_MT7988) ?
+                      MT7531_MIRROR_PORT_GET(val) :
+                      MIRROR_PORT(val);
 }
 
 static int mt753x_mirror_port_set(unsigned int id, u32 val)
 {
-       return (id == ID_MT7531) ? MT7531_MIRROR_PORT_SET(val) :
-                                  MIRROR_PORT(val);
+       return (id == ID_MT7531 || id == ID_MT7988) ?
+                      MT7531_MIRROR_PORT_SET(val) :
+                      MIRROR_PORT(val);
 }
 
 static int mt753x_port_mirror_add(struct dsa_switch *ds, int port,
@@ -2319,6 +2482,9 @@ mt7530_setup(struct dsa_switch *ds)
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       /* Allow mirroring frames received on the local port (monitor port). */
+       mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
        /* Setup VLAN ID 0 for VLAN-unaware bridges */
        ret = mt7530_setup_vlan0(priv);
        if (ret)
@@ -2430,6 +2596,9 @@ mt7531_setup_common(struct dsa_switch *ds)
                           PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
        }
 
+       /* Allow mirroring frames received on the local port (monitor port). */
+       mt7530_set(priv, MT753X_AGC, LOCAL_EN);
+
        /* Flush the FDB table */
        ret = mt7530_fdb_cmd(priv, MT7530_FDB_FLUSH, NULL);
        if (ret < 0)
@@ -2505,18 +2674,25 @@ mt7531_setup(struct dsa_switch *ds)
        mt7530_rmw(priv, MT7531_GPIO_MODE0, MT7531_GPIO0_MASK,
                   MT7531_GPIO0_INTERRUPT);
 
-       /* Enable PHY core PLL, since phy_device has not yet been created
-        * provided for phy_[read,write]_mmd_indirect is called, we provide
-        * our own mt7531_ind_mmd_phy_[read,write] to complete this
-        * function.
+       /* Enable Energy-Efficient Ethernet (EEE) and PHY core PLL, since
+        * phy_device has not yet been created provided for
+        * phy_[read,write]_mmd_indirect is called, we provide our own
+        * mt7531_ind_mmd_phy_[read,write] to complete this function.
         */
        val = mt7531_ind_c45_phy_read(priv, MT753X_CTRL_PHY_ADDR,
                                      MDIO_MMD_VEND2, CORE_PLL_GROUP4);
-       val |= MT7531_PHY_PLL_BYPASS_MODE;
+       val |= MT7531_RG_SYSPLL_DMY2 | MT7531_PHY_PLL_BYPASS_MODE;
        val &= ~MT7531_PHY_PLL_OFF;
        mt7531_ind_c45_phy_write(priv, MT753X_CTRL_PHY_ADDR, MDIO_MMD_VEND2,
                                 CORE_PLL_GROUP4, val);
 
+       /* Disable EEE advertisement on the switch PHYs. */
+       for (i = MT753X_CTRL_PHY_ADDR;
+            i < MT753X_CTRL_PHY_ADDR + MT7530_NUM_PHYS; i++) {
+               mt7531_ind_c45_phy_write(priv, i, MDIO_MMD_AN, MDIO_AN_EEE_ADV,
+                                        0);
+       }
+
        mt7531_setup_common(ds);
 
        /* Setup VLAN ID 0 for VLAN-unaware bridges */
index d17b318e6ee4882ed8b6f6668eaa57a99a38d184..a08053390b285e3f27d0bc08a3acba3effd17007 100644 (file)
@@ -32,6 +32,10 @@ enum mt753x_id {
 #define SYSC_REG_RSTCTRL               0x34
 #define  RESET_MCM                     BIT(2)
 
+/* Register for ARL global control */
+#define MT753X_AGC                     0xc
+#define  LOCAL_EN                      BIT(7)
+
 /* Registers to mac forward control for unknown frames */
 #define MT7530_MFC                     0x10
 #define  BC_FFP(x)                     (((x) & 0xff) << 24)
@@ -65,6 +69,7 @@ enum mt753x_id {
 
 /* Registers for BPDU and PAE frame control*/
 #define MT753X_BPC                     0x24
+#define  MT753X_PAE_BPDU_FR            BIT(25)
 #define  MT753X_PAE_EG_TAG_MASK                GENMASK(24, 22)
 #define  MT753X_PAE_EG_TAG(x)          FIELD_PREP(MT753X_PAE_EG_TAG_MASK, x)
 #define  MT753X_PAE_PORT_FW_MASK       GENMASK(18, 16)
@@ -75,20 +80,24 @@ enum mt753x_id {
 
 /* Register for :01 and :02 MAC DA frame control */
 #define MT753X_RGAC1                   0x28
+#define  MT753X_R02_BPDU_FR            BIT(25)
 #define  MT753X_R02_EG_TAG_MASK                GENMASK(24, 22)
 #define  MT753X_R02_EG_TAG(x)          FIELD_PREP(MT753X_R02_EG_TAG_MASK, x)
 #define  MT753X_R02_PORT_FW_MASK       GENMASK(18, 16)
 #define  MT753X_R02_PORT_FW(x)         FIELD_PREP(MT753X_R02_PORT_FW_MASK, x)
+#define  MT753X_R01_BPDU_FR            BIT(9)
 #define  MT753X_R01_EG_TAG_MASK                GENMASK(8, 6)
 #define  MT753X_R01_EG_TAG(x)          FIELD_PREP(MT753X_R01_EG_TAG_MASK, x)
 #define  MT753X_R01_PORT_FW_MASK       GENMASK(2, 0)
 
 /* Register for :03 and :0E MAC DA frame control */
 #define MT753X_RGAC2                   0x2c
+#define  MT753X_R0E_BPDU_FR            BIT(25)
 #define  MT753X_R0E_EG_TAG_MASK                GENMASK(24, 22)
 #define  MT753X_R0E_EG_TAG(x)          FIELD_PREP(MT753X_R0E_EG_TAG_MASK, x)
 #define  MT753X_R0E_PORT_FW_MASK       GENMASK(18, 16)
 #define  MT753X_R0E_PORT_FW(x)         FIELD_PREP(MT753X_R0E_PORT_FW_MASK, x)
+#define  MT753X_R03_BPDU_FR            BIT(9)
 #define  MT753X_R03_EG_TAG_MASK                GENMASK(8, 6)
 #define  MT753X_R03_EG_TAG(x)          FIELD_PREP(MT753X_R03_EG_TAG_MASK, x)
 #define  MT753X_R03_PORT_FW_MASK       GENMASK(2, 0)
@@ -616,6 +625,7 @@ enum mt7531_clk_skew {
 #define  RG_SYSPLL_DDSFBK_EN           BIT(12)
 #define  RG_SYSPLL_BIAS_EN             BIT(11)
 #define  RG_SYSPLL_BIAS_LPF_EN         BIT(10)
+#define  MT7531_RG_SYSPLL_DMY2         BIT(6)
 #define  MT7531_PHY_PLL_OFF            BIT(5)
 #define  MT7531_PHY_PLL_BYPASS_MODE    BIT(4)
 
index 9e9e4a03f1a8c9bd4c8d68cb20340157c4547e80..2d8a66ea82fab7f0a023ab469ccc33321d0b4ba3 100644 (file)
@@ -351,7 +351,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
                        ENA_COM_BOUNCE_BUFFER_CNTRL_CNT;
                io_sq->bounce_buf_ctrl.next_to_use = 0;
 
-               size = io_sq->bounce_buf_ctrl.buffer_size *
+               size = (size_t)io_sq->bounce_buf_ctrl.buffer_size *
                        io_sq->bounce_buf_ctrl.buffers_num;
 
                dev_node = dev_to_node(ena_dev->dmadev);
index 09e7da1a69c9f0c8141e03be445c2589e9d3a999..be5acfa41ee0ce4d80605e0bcdc6dc743c421f42 100644 (file)
@@ -718,8 +718,11 @@ void ena_unmap_tx_buff(struct ena_ring *tx_ring,
 static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 {
        bool print_once = true;
+       bool is_xdp_ring;
        u32 i;
 
+       is_xdp_ring = ENA_IS_XDP_INDEX(tx_ring->adapter, tx_ring->qid);
+
        for (i = 0; i < tx_ring->ring_size; i++) {
                struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i];
 
@@ -739,10 +742,15 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
 
                ena_unmap_tx_buff(tx_ring, tx_info);
 
-               dev_kfree_skb_any(tx_info->skb);
+               if (is_xdp_ring)
+                       xdp_return_frame(tx_info->xdpf);
+               else
+                       dev_kfree_skb_any(tx_info->skb);
        }
-       netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
-                                                 tx_ring->qid));
+
+       if (!is_xdp_ring)
+               netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
+                                                         tx_ring->qid));
 }
 
 static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
@@ -3481,10 +3489,11 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
 {
        struct ena_ring *tx_ring;
        struct ena_ring *rx_ring;
-       int i, budget, rc;
+       int qid, budget, rc;
        int io_queue_count;
 
        io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
+
        /* Make sure the driver doesn't turn the device in other process */
        smp_rmb();
 
@@ -3497,27 +3506,29 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
        if (adapter->missing_tx_completion_to == ENA_HW_HINTS_NO_TIMEOUT)
                return;
 
-       budget = ENA_MONITORED_TX_QUEUES;
+       budget = min_t(u32, io_queue_count, ENA_MONITORED_TX_QUEUES);
 
-       for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
-               tx_ring = &adapter->tx_ring[i];
-               rx_ring = &adapter->rx_ring[i];
+       qid = adapter->last_monitored_tx_qid;
+
+       while (budget) {
+               qid = (qid + 1) % io_queue_count;
+
+               tx_ring = &adapter->tx_ring[qid];
+               rx_ring = &adapter->rx_ring[qid];
 
                rc = check_missing_comp_in_tx_queue(adapter, tx_ring);
                if (unlikely(rc))
                        return;
 
-               rc =  !ENA_IS_XDP_INDEX(adapter, i) ?
+               rc =  !ENA_IS_XDP_INDEX(adapter, qid) ?
                        check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
                if (unlikely(rc))
                        return;
 
                budget--;
-               if (!budget)
-                       break;
        }
 
-       adapter->last_monitored_tx_qid = i % io_queue_count;
+       adapter->last_monitored_tx_qid = qid;
 }
 
 /* trigger napi schedule after 2 consecutive detections */
index 337c435d3ce998b1b8f69a86f8be7997e1ff99c8..5b175e7e92a10ba19917b9c5e63d89bc1f2a8dd5 100644 (file)
@@ -89,7 +89,7 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
 
        rc = ena_xdp_tx_map_frame(tx_ring, tx_info, xdpf, &ena_tx_ctx);
        if (unlikely(rc))
-               return rc;
+               goto err;
 
        ena_tx_ctx.req_id = req_id;
 
@@ -112,7 +112,9 @@ int ena_xdp_xmit_frame(struct ena_ring *tx_ring,
 
 error_unmap_dma:
        ena_unmap_tx_buff(tx_ring, tx_info);
+err:
        tx_info->xdpf = NULL;
+
        return rc;
 }
 
index 9662ee72814c0c64fab3ca08db99e888faa51124..536635e5772799e17ef31857c655735c5ef88865 100644 (file)
@@ -593,6 +593,16 @@ err_out:
        pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
+void pdsc_pci_reset_thread(struct work_struct *work)
+{
+       struct pdsc *pdsc = container_of(work, struct pdsc, pci_reset_work);
+       struct pci_dev *pdev = pdsc->pdev;
+
+       pci_dev_get(pdev);
+       pci_reset_function(pdev);
+       pci_dev_put(pdev);
+}
+
 static void pdsc_check_pci_health(struct pdsc *pdsc)
 {
        u8 fw_status;
@@ -607,7 +617,8 @@ static void pdsc_check_pci_health(struct pdsc *pdsc)
        if (fw_status != PDS_RC_BAD_PCI)
                return;
 
-       pci_reset_function(pdsc->pdev);
+       /* prevent deadlock between pdsc_reset_prepare and pdsc_health_thread */
+       queue_work(pdsc->wq, &pdsc->pci_reset_work);
 }
 
 void pdsc_health_thread(struct work_struct *work)
index 92d7657dd6147e7770b7d72f8ee25deb303370b9..a3e17a0c187a6a4a5eaab7207f7089b2172b8e0e 100644 (file)
@@ -197,6 +197,7 @@ struct pdsc {
        struct pdsc_qcq notifyqcq;
        u64 last_eid;
        struct pdsc_viftype *viftype_status;
+       struct work_struct pci_reset_work;
 };
 
 /** enum pds_core_dbell_bits - bitwise composition of dbell values.
@@ -313,5 +314,6 @@ int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
 
 void pdsc_fw_down(struct pdsc *pdsc);
 void pdsc_fw_up(struct pdsc *pdsc);
+void pdsc_pci_reset_thread(struct work_struct *work);
 
 #endif /* _PDSC_H_ */
index e494e1298dc9a36c175a6b86623450c0da55ad7b..495ef4ef8c103d6fcacd8b155dbc09e42d68345c 100644 (file)
@@ -229,6 +229,9 @@ int pdsc_devcmd_reset(struct pdsc *pdsc)
                .reset.opcode = PDS_CORE_CMD_RESET,
        };
 
+       if (!pdsc_is_fw_running(pdsc))
+               return 0;
+
        return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
 }
 
index ab6133e7db422d3579291e3476efb48e5e0be06c..660268ff95623fbe9c86ee2078913c85a3579a5a 100644 (file)
@@ -239,6 +239,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
        snprintf(wq_name, sizeof(wq_name), "%s.%d", PDS_CORE_DRV_NAME, pdsc->uid);
        pdsc->wq = create_singlethread_workqueue(wq_name);
        INIT_WORK(&pdsc->health_work, pdsc_health_thread);
+       INIT_WORK(&pdsc->pci_reset_work, pdsc_pci_reset_thread);
        timer_setup(&pdsc->wdtimer, pdsc_wdtimer_cb, 0);
        pdsc->wdtimer_period = PDSC_WATCHDOG_SECS * HZ;
 
index 493b724848c8f44122abd1b1fb340e97abc30a09..57e61f9631678edf31a2ff237fe0301254a396e5 100644 (file)
@@ -11758,6 +11758,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        /* VF-reps may need to be re-opened after the PF is re-opened */
        if (BNXT_PF(bp))
                bnxt_vf_reps_open(bp);
+       if (bp->ptp_cfg)
+               atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS);
        bnxt_ptp_init_rtc(bp, true);
        bnxt_ptp_cfg_tstamp_filters(bp);
        bnxt_cfg_usr_fltrs(bp);
index 93f9bd55020f277f02fb6ed959bd5e82ec35fd93..195c02dc0683054e03680abff45f3f42d9605192 100644 (file)
@@ -210,6 +210,9 @@ void bnxt_ulp_start(struct bnxt *bp, int err)
        if (err)
                return;
 
+       if (edev->ulp_tbl->msix_requested)
+               bnxt_fill_msix_vecs(bp, edev->msix_entries);
+
        if (aux_priv) {
                struct auxiliary_device *adev;
 
@@ -392,12 +395,13 @@ void bnxt_rdma_aux_device_init(struct bnxt *bp)
        if (!edev)
                goto aux_dev_uninit;
 
+       aux_priv->edev = edev;
+
        ulp = kzalloc(sizeof(*ulp), GFP_KERNEL);
        if (!ulp)
                goto aux_dev_uninit;
 
        edev->ulp_tbl = ulp;
-       aux_priv->edev = edev;
        bp->edev = edev;
        bnxt_set_edev_info(edev, bp);
 
index b890410a2bc0bacd27eab5acd6f87cd8b0110939..688ccb0615ab9f87e7caf9e6fa522444613b2bf3 100644 (file)
@@ -28,6 +28,8 @@ ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers,
         * - ICE_TC_FLWR_FIELD_VLAN_TPID (present if specified)
         * - Tunnel flag (present if tunnel)
         */
+       if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS)
+               lkups_cnt++;
 
        if (flags & ICE_TC_FLWR_FIELD_TENANT_ID)
                lkups_cnt++;
@@ -363,6 +365,11 @@ ice_tc_fill_rules(struct ice_hw *hw, u32 flags,
        /* Always add direction metadata */
        ice_rule_add_direction_metadata(&list[ICE_TC_METADATA_LKUP_IDX]);
 
+       if (tc_fltr->direction == ICE_ESWITCH_FLTR_EGRESS) {
+               ice_rule_add_src_vsi_metadata(&list[i]);
+               i++;
+       }
+
        rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type);
        if (tc_fltr->tunnel_type != TNL_LAST) {
                i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list, i);
@@ -772,7 +779,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
        int ret;
        int i;
 
-       if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) {
+       if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT) {
                NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)");
                return -EOPNOTSUPP;
        }
@@ -820,6 +827,7 @@ ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr)
 
        /* specify the cookie as filter_rule_id */
        rule_info.fltr_rule_id = fltr->cookie;
+       rule_info.src_vsi = vsi->idx;
 
        ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added);
        if (ret == -EEXIST) {
@@ -1481,7 +1489,10 @@ ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi,
                  (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
                   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
                   BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
-                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+                  BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
                NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel");
                return -EOPNOTSUPP;
        } else {
index d39001cdc707ee3f72e80a9be503b438e8ff9eca..00af8888e3291a061e547fe4824df04005276302 100644 (file)
@@ -4819,18 +4819,18 @@ static int rvu_nix_block_init(struct rvu *rvu, struct nix_hw *nix_hw)
                 */
                rvu_write64(rvu, blkaddr, NIX_AF_CFG,
                            rvu_read64(rvu, blkaddr, NIX_AF_CFG) | 0x40ULL);
+       }
 
-               /* Set chan/link to backpressure TL3 instead of TL2 */
-               rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
+       /* Set chan/link to backpressure TL3 instead of TL2 */
+       rvu_write64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL, 0x01);
 
-               /* Disable SQ manager's sticky mode operation (set TM6 = 0)
-                * This sticky mode is known to cause SQ stalls when multiple
-                * SQs are mapped to same SMQ and transmitting pkts at a time.
-                */
-               cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
-               cfg &= ~BIT_ULL(15);
-               rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
-       }
+       /* Disable SQ manager's sticky mode operation (set TM6 = 0)
+        * This sticky mode is known to cause SQ stalls when multiple
+        * SQs are mapped to same SMQ and transmitting pkts at a time.
+        */
+       cfg = rvu_read64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS);
+       cfg &= ~BIT_ULL(15);
+       rvu_write64(rvu, blkaddr, NIX_AF_SQM_DBG_CTL_STATUS, cfg);
 
        ltdefs = rvu->kpu.lt_def;
        /* Calibrate X2P bus to check if CGX/LBK links are fine */
index 87bdb93cb066e9afba84e5a93556c1efa0d780d9..f4655a8c0705d70b3a4aff580ccb1e437069ca64 100644 (file)
@@ -689,6 +689,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
 
        if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
                struct flow_match_control match;
+               u32 val;
 
                flow_rule_match_control(rule, &match);
                if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
@@ -697,12 +698,14 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
                }
 
                if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
+                       val = match.key->flags & FLOW_DIS_IS_FRAGMENT;
                        if (ntohs(flow_spec->etype) == ETH_P_IP) {
-                               flow_spec->ip_flag = IPV4_FLAG_MORE;
+                               flow_spec->ip_flag = val ? IPV4_FLAG_MORE : 0;
                                flow_mask->ip_flag = IPV4_FLAG_MORE;
                                req->features |= BIT_ULL(NPC_IPFRAG_IPV4);
                        } else if (ntohs(flow_spec->etype) == ETH_P_IPV6) {
-                               flow_spec->next_header = IPPROTO_FRAGMENT;
+                               flow_spec->next_header = val ?
+                                                        IPPROTO_FRAGMENT : 0;
                                flow_mask->next_header = 0xff;
                                req->features |= BIT_ULL(NPC_IPFRAG_IPV6);
                        } else {
index 1e77bbf5d22a1a193602b199c90d205e219595b0..1723e9912ae07ca8c58bfed17e959ab4e0eb4fe5 100644 (file)
@@ -382,6 +382,7 @@ static void otx2_qos_read_txschq_cfg_tl(struct otx2_qos_node *parent,
                otx2_qos_read_txschq_cfg_tl(node, cfg);
                cnt = cfg->static_node_pos[node->level];
                cfg->schq_contig_list[node->level][cnt] = node->schq;
+               cfg->schq_index_used[node->level][cnt] = true;
                cfg->schq_contig[node->level]++;
                cfg->static_node_pos[node->level]++;
                otx2_qos_read_txschq_cfg_schq(node, cfg);
index c895e265ae0ebcde930acf3785ba9ab1b63b65e5..61334a71058c7594a61ca768ce041f92ab238d24 100644 (file)
@@ -1074,13 +1074,13 @@ mtk_wed_dma_disable(struct mtk_wed_device *dev)
 static void
 mtk_wed_stop(struct mtk_wed_device *dev)
 {
+       mtk_wed_dma_disable(dev);
        mtk_wed_set_ext_int(dev, false);
 
        wed_w32(dev, MTK_WED_WPDMA_INT_TRIGGER, 0);
        wed_w32(dev, MTK_WED_WDMA_INT_TRIGGER, 0);
        wdma_w32(dev, MTK_WDMA_INT_MASK, 0);
        wdma_w32(dev, MTK_WDMA_INT_GRP2, 0);
-       wed_w32(dev, MTK_WED_WPDMA_INT_MASK, 0);
 
        if (!mtk_wed_get_rx_capa(dev))
                return;
@@ -1093,7 +1093,6 @@ static void
 mtk_wed_deinit(struct mtk_wed_device *dev)
 {
        mtk_wed_stop(dev);
-       mtk_wed_dma_disable(dev);
 
        wed_clr(dev, MTK_WED_CTRL,
                MTK_WED_CTRL_WDMA_INT_AGENT_EN |
@@ -2605,9 +2604,6 @@ mtk_wed_irq_get(struct mtk_wed_device *dev, u32 mask)
 static void
 mtk_wed_irq_set_mask(struct mtk_wed_device *dev, u32 mask)
 {
-       if (!dev->running)
-               return;
-
        mtk_wed_set_ext_int(dev, !!mask);
        wed_w32(dev, MTK_WED_INT_MASK, mask);
 }
index 86f1854698b4e80816b1b55e1d4f4d31fdf0737f..883c044852f1df39852b50b50a80ab31c7bfb091 100644 (file)
@@ -95,9 +95,15 @@ static inline void mlx5e_ptp_metadata_fifo_push(struct mlx5e_ptp_metadata_fifo *
 }
 
 static inline u8
+mlx5e_ptp_metadata_fifo_peek(struct mlx5e_ptp_metadata_fifo *fifo)
+{
+       return fifo->data[fifo->mask & fifo->cc];
+}
+
+static inline void
 mlx5e_ptp_metadata_fifo_pop(struct mlx5e_ptp_metadata_fifo *fifo)
 {
-       return fifo->data[fifo->mask & fifo->cc++];
+       fifo->cc++;
 }
 
 static inline void
index e87e26f2c669c2e39f59a9f656e643fce2b48aae..6743806b8480602a8d0a3d02cddcf2d2c1c82199 100644 (file)
@@ -83,24 +83,25 @@ int mlx5e_open_qos_sq(struct mlx5e_priv *priv, struct mlx5e_channels *chs,
 
        txq_ix = mlx5e_qid_from_qos(chs, node_qid);
 
-       WARN_ON(node_qid > priv->htb_max_qos_sqs);
-       if (node_qid == priv->htb_max_qos_sqs) {
-               struct mlx5e_sq_stats *stats, **stats_list = NULL;
-
-               if (priv->htb_max_qos_sqs == 0) {
-                       stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
-                                             sizeof(*stats_list),
-                                             GFP_KERNEL);
-                       if (!stats_list)
-                               return -ENOMEM;
-               }
+       WARN_ON(node_qid >= mlx5e_htb_cur_leaf_nodes(priv->htb));
+       if (!priv->htb_qos_sq_stats) {
+               struct mlx5e_sq_stats **stats_list;
+
+               stats_list = kvcalloc(mlx5e_qos_max_leaf_nodes(priv->mdev),
+                                     sizeof(*stats_list), GFP_KERNEL);
+               if (!stats_list)
+                       return -ENOMEM;
+
+               WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+       }
+
+       if (!priv->htb_qos_sq_stats[node_qid]) {
+               struct mlx5e_sq_stats *stats;
+
                stats = kzalloc(sizeof(*stats), GFP_KERNEL);
-               if (!stats) {
-                       kvfree(stats_list);
+               if (!stats)
                        return -ENOMEM;
-               }
-               if (stats_list)
-                       WRITE_ONCE(priv->htb_qos_sq_stats, stats_list);
+
                WRITE_ONCE(priv->htb_qos_sq_stats[node_qid], stats);
                /* Order htb_max_qos_sqs increment after writing the array pointer.
                 * Pairs with smp_load_acquire in en_stats.c.
index 0ab9db319530258fab6c7e6ad5648e13550069f8..22918b2ef7f128849be838063819ed12509abb45 100644 (file)
@@ -108,7 +108,10 @@ static int mlx5e_tx_reporter_err_cqe_recover(void *ctx)
        mlx5e_reset_txqsq_cc_pc(sq);
        sq->stats->recover++;
        clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state);
+       rtnl_lock();
        mlx5e_activate_txqsq(sq);
+       rtnl_unlock();
+
        if (sq->channel)
                mlx5e_trigger_napi_icosq(sq->channel);
        else
@@ -179,12 +182,16 @@ static int mlx5e_tx_reporter_ptpsq_unhealthy_recover(void *ctx)
        carrier_ok = netif_carrier_ok(netdev);
        netif_carrier_off(netdev);
 
+       rtnl_lock();
        mlx5e_deactivate_priv_channels(priv);
+       rtnl_unlock();
 
        mlx5e_ptp_close(chs->ptp);
        err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
 
+       rtnl_lock();
        mlx5e_activate_priv_channels(priv);
+       rtnl_unlock();
 
        /* return carrier back if needed */
        if (carrier_ok)
index bcafb4bf94154ff01969fc851852d4234f5b0c04..8d9a3b5ec973b39aaa1addc8f6c5e3a568a7ab1a 100644 (file)
@@ -179,6 +179,13 @@ u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels)
        return min_t(u32, rqt_size, max_cap_rqt_size);
 }
 
+#define MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH 256
+
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void)
+{
+       return MLX5E_MAX_RQT_SIZE_ALLOWED_WITH_XOR8_HASH / MLX5E_UNIFORM_SPREAD_RQT_FACTOR;
+}
+
 void mlx5e_rqt_destroy(struct mlx5e_rqt *rqt)
 {
        mlx5_core_destroy_rqt(rqt->mdev, rqt->rqtn);
index e0bc30308c77000038d151a7caa206c516a6fe9a..2f9e04a8418f143fbf3d01423721d85b2d5b5a2a 100644 (file)
@@ -38,6 +38,7 @@ static inline u32 mlx5e_rqt_get_rqtn(struct mlx5e_rqt *rqt)
 }
 
 u32 mlx5e_rqt_size(struct mlx5_core_dev *mdev, unsigned int num_channels);
+unsigned int mlx5e_rqt_max_num_channels_allowed_for_xor8(void);
 int mlx5e_rqt_redirect_direct(struct mlx5e_rqt *rqt, u32 rqn, u32 *vhca_id);
 int mlx5e_rqt_redirect_indir(struct mlx5e_rqt *rqt, u32 *rqns, u32 *vhca_ids,
                             unsigned int num_rqns,
index f675b1926340f9ca4218aa47febac7c5139ab0e9..f66bbc8464645efabc08ebf923fada5e1f79c5fe 100644 (file)
@@ -57,6 +57,7 @@ int mlx5e_selq_init(struct mlx5e_selq *selq, struct mutex *state_lock)
 
 void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
 {
+       mutex_lock(selq->state_lock);
        WARN_ON_ONCE(selq->is_prepared);
 
        kvfree(selq->standby);
@@ -67,6 +68,7 @@ void mlx5e_selq_cleanup(struct mlx5e_selq *selq)
 
        kvfree(selq->standby);
        selq->standby = NULL;
+       mutex_unlock(selq->state_lock);
 }
 
 void mlx5e_selq_prepare_params(struct mlx5e_selq *selq, struct mlx5e_params *params)
index c7f542d0b8f08c635a6fad868a364a8f5f91ba8c..93cf23278d93c2629977f38ee7a39e7cd6c0aaa6 100644 (file)
@@ -46,6 +46,10 @@ struct arfs_table {
        struct hlist_head        rules_hash[ARFS_HASH_SIZE];
 };
 
+enum {
+       MLX5E_ARFS_STATE_ENABLED,
+};
+
 enum arfs_type {
        ARFS_IPV4_TCP,
        ARFS_IPV6_TCP,
@@ -60,6 +64,7 @@ struct mlx5e_arfs_tables {
        spinlock_t                     arfs_lock;
        int                            last_filter_id;
        struct workqueue_struct        *wq;
+       unsigned long                  state;
 };
 
 struct arfs_tuple {
@@ -170,6 +175,8 @@ int mlx5e_arfs_enable(struct mlx5e_flow_steering *fs)
                        return err;
                }
        }
+       set_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
        return 0;
 }
 
@@ -455,6 +462,8 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs)
        int i;
        int j;
 
+       clear_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state);
+
        spin_lock_bh(&arfs->arfs_lock);
        mlx5e_for_each_arfs_rule(rule, htmp, arfs->arfs_tables, i, j) {
                hlist_del_init(&rule->hlist);
@@ -627,17 +636,8 @@ static void arfs_handle_work(struct work_struct *work)
        struct mlx5_flow_handle *rule;
 
        arfs = mlx5e_fs_get_arfs(priv->fs);
-       mutex_lock(&priv->state_lock);
-       if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
-               spin_lock_bh(&arfs->arfs_lock);
-               hlist_del(&arfs_rule->hlist);
-               spin_unlock_bh(&arfs->arfs_lock);
-
-               mutex_unlock(&priv->state_lock);
-               kfree(arfs_rule);
-               goto out;
-       }
-       mutex_unlock(&priv->state_lock);
+       if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state))
+               return;
 
        if (!arfs_rule->rule) {
                rule = arfs_add_rule(priv, arfs_rule);
@@ -753,6 +753,11 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
                return -EPROTONOSUPPORT;
 
        spin_lock_bh(&arfs->arfs_lock);
+       if (!test_bit(MLX5E_ARFS_STATE_ENABLED, &arfs->state)) {
+               spin_unlock_bh(&arfs->arfs_lock);
+               return -EPERM;
+       }
+
        arfs_rule = arfs_find_rule(arfs_t, &fk);
        if (arfs_rule) {
                if (arfs_rule->rxq == rxq_index || work_busy(&arfs_rule->arfs_work)) {
index cc51ce16df14abe530910e063b9072c9e23ff49c..67a29826bb5702b8fd5e81e8673da5b6291bf7f3 100644 (file)
@@ -451,6 +451,34 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 
        mutex_lock(&priv->state_lock);
 
+       if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) {
+               unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+               if (count > xor8_max_channels) {
+                       err = -EINVAL;
+                       netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n",
+                                  __func__, count, xor8_max_channels);
+                       goto out;
+               }
+       }
+
+       /* If RXFH is configured, changing the channels number is allowed only if
+        * it does not require resizing the RSS table. This is because the previous
+        * configuration may no longer be compatible with the new RSS table.
+        */
+       if (netif_is_rxfh_configured(priv->netdev)) {
+               int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels);
+               int new_rqt_size = mlx5e_rqt_size(priv->mdev, count);
+
+               if (new_rqt_size != cur_rqt_size) {
+                       err = -EINVAL;
+                       netdev_err(priv->netdev,
+                                  "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n",
+                                  __func__, new_rqt_size, cur_rqt_size);
+                       goto out;
+               }
+       }
+
        /* Don't allow changing the number of channels if HTB offload is active,
         * because the numeration of the QoS SQs will change, while per-queue
         * qdiscs are attached.
@@ -561,12 +589,12 @@ static int mlx5e_get_coalesce(struct net_device *netdev,
 static void
 mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        int tc;
        int i;
 
        for (i = 0; i < priv->channels.num; ++i) {
                struct mlx5e_channel *c = priv->channels.c[i];
+               struct mlx5_core_dev *mdev = c->mdev;
 
                for (tc = 0; tc < c->num_tc; tc++) {
                        mlx5_core_modify_cq_moderation(mdev,
@@ -580,11 +608,11 @@ mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coal
 static void
 mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal)
 {
-       struct mlx5_core_dev *mdev = priv->mdev;
        int i;
 
        for (i = 0; i < priv->channels.num; ++i) {
                struct mlx5e_channel *c = priv->channels.c[i];
+               struct mlx5_core_dev *mdev = c->mdev;
 
                mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq,
                                               coal->rx_coalesce_usecs,
@@ -1281,17 +1309,30 @@ int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh,
        struct mlx5e_priv *priv = netdev_priv(dev);
        u32 *rss_context = &rxfh->rss_context;
        u8 hfunc = rxfh->hfunc;
+       unsigned int count;
        int err;
 
        mutex_lock(&priv->state_lock);
+
+       count = priv->channels.params.num_channels;
+
+       if (hfunc == ETH_RSS_HASH_XOR) {
+               unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8();
+
+               if (count > xor8_max_channels) {
+                       err = -EINVAL;
+                       netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n",
+                                  __func__, count, xor8_max_channels);
+                       goto unlock;
+               }
+       }
+
        if (*rss_context && rxfh->rss_delete) {
                err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context);
                goto unlock;
        }
 
        if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) {
-               unsigned int count = priv->channels.params.num_channels;
-
                err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count);
                if (err)
                        goto unlock;
index 91848eae45655fd57d7dcc3365f8ad5094f61f3d..319930c04093ba2d15d498006ad9b3d060a883b7 100644 (file)
@@ -209,8 +209,8 @@ static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data)
                                                      *data,
                                                      mlx5e_devcom_event_mpv,
                                                      priv);
-       if (IS_ERR_OR_NULL(priv->devcom))
-               return -EOPNOTSUPP;
+       if (IS_ERR(priv->devcom))
+               return PTR_ERR(priv->devcom);
 
        if (mlx5_core_is_mp_master(priv->mdev)) {
                mlx5_devcom_send_event(priv->devcom, MPV_DEVCOM_MASTER_UP,
@@ -5726,9 +5726,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
        kfree(priv->tx_rates);
        kfree(priv->txq2sq);
        destroy_workqueue(priv->wq);
-       mutex_lock(&priv->state_lock);
        mlx5e_selq_cleanup(&priv->selq);
-       mutex_unlock(&priv->state_lock);
        free_cpumask_var(priv->scratchpad.cpumask);
 
        for (i = 0; i < priv->htb_max_qos_sqs; i++)
index 2fa076b23fbead06bceb6697e0ebb0238bb5be7e..e21a3b4128ce880478795b023e1ff314e9336dd0 100644 (file)
@@ -398,6 +398,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                     (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) {
                u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
 
+               mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist);
+
                mlx5e_skb_cb_hwtstamp_init(skb);
                mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb,
                                           metadata_index);
@@ -496,9 +498,6 @@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 
 err_drop:
        stats->dropped++;
-       if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
-               mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist,
-                                            be32_to_cpu(eseg->flow_table_metadata));
        dev_kfree_skb_any(skb);
        mlx5e_tx_flush(sq);
 }
@@ -657,7 +656,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb,
 {
        if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
                eseg->flow_table_metadata =
-                       cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist));
+                       cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist));
 }
 
 static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
index 3047d7015c5256726338904432ce56845c59c39c..1789800faaeb62841387ed69b0a82aab3283bf46 100644 (file)
@@ -1868,6 +1868,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
        if (err)
                goto abort;
 
+       dev->priv.eswitch = esw;
        err = esw_offloads_init(esw);
        if (err)
                goto reps_err;
@@ -1892,11 +1893,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
                esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_BASIC;
        else
                esw->offloads.encap = DEVLINK_ESWITCH_ENCAP_MODE_NONE;
-       if (MLX5_ESWITCH_MANAGER(dev) &&
-           mlx5_esw_vport_match_metadata_supported(esw))
-               esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
-
-       dev->priv.eswitch = esw;
        BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
 
        esw_info(dev,
@@ -1908,6 +1904,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 
 reps_err:
        mlx5_esw_vports_cleanup(esw);
+       dev->priv.eswitch = NULL;
 abort:
        if (esw->work_queue)
                destroy_workqueue(esw->work_queue);
@@ -1926,7 +1923,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 
        esw_info(esw->dev, "cleanup\n");
 
-       esw->dev->priv.eswitch = NULL;
        destroy_workqueue(esw->work_queue);
        WARN_ON(refcount_read(&esw->qos.refcnt));
        mutex_destroy(&esw->state_lock);
@@ -1937,6 +1933,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
        mutex_destroy(&esw->offloads.encap_tbl_lock);
        mutex_destroy(&esw->offloads.decap_tbl_lock);
        esw_offloads_cleanup(esw);
+       esw->dev->priv.eswitch = NULL;
        mlx5_esw_vports_cleanup(esw);
        debugfs_remove_recursive(esw->debugfs_root);
        devl_params_unregister(priv_to_devlink(esw->dev), mlx5_eswitch_params,
index baaae628b0a0f6510e2c350cbab0b6309b32da52..844d3e3a65ddf04c6e326127b1b1c05ed351b3a7 100644 (file)
@@ -43,6 +43,7 @@
 #include "rdma.h"
 #include "en.h"
 #include "fs_core.h"
+#include "lib/mlx5.h"
 #include "lib/devcom.h"
 #include "lib/eq.h"
 #include "lib/fs_chains.h"
@@ -2476,6 +2477,10 @@ int esw_offloads_init(struct mlx5_eswitch *esw)
        if (err)
                return err;
 
+       if (MLX5_ESWITCH_MANAGER(esw->dev) &&
+           mlx5_esw_vport_match_metadata_supported(esw))
+               esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA;
+
        err = devl_params_register(priv_to_devlink(esw->dev),
                                   esw_devlink_params,
                                   ARRAY_SIZE(esw_devlink_params));
@@ -3055,7 +3060,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key)
                                                     key,
                                                     mlx5_esw_offloads_devcom_event,
                                                     esw);
-       if (IS_ERR_OR_NULL(esw->devcom))
+       if (IS_ERR(esw->devcom))
                return;
 
        mlx5_devcom_send_event(esw->devcom,
@@ -3707,6 +3712,12 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode,
        if (esw_mode_from_devlink(mode, &mlx5_mode))
                return -EINVAL;
 
+       if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured.");
+               return -EPERM;
+       }
+
        mlx5_lag_disable_change(esw->dev);
        err = mlx5_esw_try_lock(esw);
        if (err < 0) {
index e6bfa7e4f146caf5b05506beaa6c9aabc6c4f74d..cf085a478e3e4c69ffdd4ee9bb24f0036e27c66d 100644 (file)
@@ -1664,6 +1664,16 @@ static int create_auto_flow_group(struct mlx5_flow_table *ft,
        return err;
 }
 
+static bool mlx5_pkt_reformat_cmp(struct mlx5_pkt_reformat *p1,
+                                 struct mlx5_pkt_reformat *p2)
+{
+       return p1->owner == p2->owner &&
+               (p1->owner == MLX5_FLOW_RESOURCE_OWNER_FW ?
+                p1->id == p2->id :
+                mlx5_fs_dr_action_get_pkt_reformat_id(p1) ==
+                mlx5_fs_dr_action_get_pkt_reformat_id(p2));
+}
+
 static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
                                struct mlx5_flow_destination *d2)
 {
@@ -1675,8 +1685,8 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
                     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_VHCA_ID) ?
                      (d1->vport.vhca_id == d2->vport.vhca_id) : true) &&
                     ((d1->vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID) ?
-                     (d1->vport.pkt_reformat->id ==
-                      d2->vport.pkt_reformat->id) : true)) ||
+                     mlx5_pkt_reformat_cmp(d1->vport.pkt_reformat,
+                                           d2->vport.pkt_reformat) : true)) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
                     d1->ft == d2->ft) ||
                    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
@@ -1808,8 +1818,9 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
        }
        trace_mlx5_fs_set_fte(fte, false);
 
+       /* Link newly added rules into the tree. */
        for (i = 0; i < handle->num_rules; i++) {
-               if (refcount_read(&handle->rule[i]->node.refcount) == 1) {
+               if (!handle->rule[i]->node.parent) {
                        tree_add_node(&handle->rule[i]->node, &fte->node);
                        trace_mlx5_fs_add_rule(handle->rule[i]);
                }
index d14459e5c04fc515ad682e11ee322aa3891e382f..69d482f7c5a29916688ac0d79d324df5f2596586 100644 (file)
@@ -703,8 +703,10 @@ int mlx5_deactivate_lag(struct mlx5_lag *ldev)
                return err;
        }
 
-       if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags))
+       if (test_bit(MLX5_LAG_MODE_FLAG_HASH_BASED, &flags)) {
                mlx5_lag_port_sel_destroy(ldev);
+               ldev->buckets = 1;
+       }
        if (mlx5_lag_has_drop_rule(ldev))
                mlx5_lag_drop_rule_cleanup(ldev);
 
index e7d59cfa8708e1617f78b28974977a9588026d1f..7b0766c89f4cf0aac5560e9eb041e564d6531e65 100644 (file)
@@ -220,7 +220,7 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc,
        struct mlx5_devcom_comp *comp;
 
        if (IS_ERR_OR_NULL(devc))
-               return NULL;
+               return ERR_PTR(-EINVAL);
 
        mutex_lock(&comp_list_lock);
        comp = devcom_component_get(devc, id, key, handler);
index 5b28084e8a03c77936e180a06246f9ef0a8dc4bd..dd5d186dc6148f065b986ee5d2363940314816db 100644 (file)
@@ -213,8 +213,8 @@ static int sd_register(struct mlx5_core_dev *dev)
        sd = mlx5_get_sd(dev);
        devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP,
                                                sd->group_id, NULL, dev);
-       if (!devcom)
-               return -ENOMEM;
+       if (IS_ERR(devcom))
+               return PTR_ERR(devcom);
 
        sd->devcom = devcom;
 
index c2593625c09ad6a9150e03baeda0ae41a1a010be..331ce47f51a17a386213d88db9aa7b3cb15d6b41 100644 (file)
@@ -956,7 +956,7 @@ static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev)
                mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS,
                                               mlx5_query_nic_system_image_guid(dev),
                                               NULL, dev);
-       if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp))
+       if (IS_ERR(dev->priv.hca_devcom_comp))
                mlx5_core_err(dev, "Failed to register devcom HCA component\n");
 }
 
@@ -1480,6 +1480,14 @@ int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev)
        if (err)
                goto err_register;
 
+       err = mlx5_crdump_enable(dev);
+       if (err)
+               mlx5_core_err(dev, "mlx5_crdump_enable failed with error code %d\n", err);
+
+       err = mlx5_hwmon_dev_register(dev);
+       if (err)
+               mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
+
        mutex_unlock(&dev->intf_state_mutex);
        return 0;
 
@@ -1505,7 +1513,10 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
        int err;
 
        devl_lock(devlink);
+       devl_register(devlink);
        err = mlx5_init_one_devl_locked(dev);
+       if (err)
+               devl_unregister(devlink);
        devl_unlock(devlink);
        return err;
 }
@@ -1517,6 +1528,8 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
        devl_lock(devlink);
        mutex_lock(&dev->intf_state_mutex);
 
+       mlx5_hwmon_dev_unregister(dev);
+       mlx5_crdump_disable(dev);
        mlx5_unregister_device(dev);
 
        if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1534,6 +1547,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
        mlx5_function_teardown(dev, true);
 out:
        mutex_unlock(&dev->intf_state_mutex);
+       devl_unregister(devlink);
        devl_unlock(devlink);
 }
 
@@ -1680,16 +1694,23 @@ int mlx5_init_one_light(struct mlx5_core_dev *dev)
        }
 
        devl_lock(devlink);
+       devl_register(devlink);
+
        err = mlx5_devlink_params_register(priv_to_devlink(dev));
-       devl_unlock(devlink);
        if (err) {
                mlx5_core_warn(dev, "mlx5_devlink_param_reg err = %d\n", err);
-               goto query_hca_caps_err;
+               goto params_reg_err;
        }
 
+       devl_unlock(devlink);
        return 0;
 
+params_reg_err:
+       devl_unregister(devlink);
+       devl_unlock(devlink);
 query_hca_caps_err:
+       devl_unregister(devlink);
+       devl_unlock(devlink);
        mlx5_function_disable(dev, true);
 out:
        dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
@@ -1702,6 +1723,7 @@ void mlx5_uninit_one_light(struct mlx5_core_dev *dev)
 
        devl_lock(devlink);
        mlx5_devlink_params_unregister(priv_to_devlink(dev));
+       devl_unregister(devlink);
        devl_unlock(devlink);
        if (dev->state != MLX5_DEVICE_STATE_UP)
                return;
@@ -1943,16 +1965,7 @@ static int probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_init_one;
        }
 
-       err = mlx5_crdump_enable(dev);
-       if (err)
-               dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
-
-       err = mlx5_hwmon_dev_register(dev);
-       if (err)
-               mlx5_core_err(dev, "mlx5_hwmon_dev_register failed with error code %d\n", err);
-
        pci_save_state(pdev);
-       devlink_register(devlink);
        return 0;
 
 err_init_one:
@@ -1973,16 +1986,9 @@ static void remove_one(struct pci_dev *pdev)
        struct devlink *devlink = priv_to_devlink(dev);
 
        set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
-       /* mlx5_drain_fw_reset() and mlx5_drain_health_wq() are using
-        * devlink notify APIs.
-        * Hence, we must drain them before unregistering the devlink.
-        */
        mlx5_drain_fw_reset(dev);
        mlx5_drain_health_wq(dev);
-       devlink_unregister(devlink);
        mlx5_sriov_disable(pdev, false);
-       mlx5_hwmon_dev_unregister(dev);
-       mlx5_crdump_disable(dev);
        mlx5_uninit_one(dev);
        mlx5_pci_close(dev);
        mlx5_mdev_uninit(dev);
index 4dcf995cb1a2042c39938ee2f166a6c3d3e6ef24..6bac8ad70ba60bf9982a110f7e115183858e0497 100644 (file)
@@ -19,6 +19,7 @@
 #define MLX5_IRQ_CTRL_SF_MAX 8
 /* min num of vectors for SFs to be enabled */
 #define MLX5_IRQ_VEC_COMP_BASE_SF 2
+#define MLX5_IRQ_VEC_COMP_BASE 1
 
 #define MLX5_EQ_SHARE_IRQ_MAX_COMP (8)
 #define MLX5_EQ_SHARE_IRQ_MAX_CTRL (UINT_MAX)
@@ -246,6 +247,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
                return;
        }
 
+       vecidx -= MLX5_IRQ_VEC_COMP_BASE;
        snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", vecidx);
 }
 
@@ -585,7 +587,7 @@ struct mlx5_irq *mlx5_irq_request_vector(struct mlx5_core_dev *dev, u16 cpu,
        struct mlx5_irq_table *table = mlx5_irq_table_get(dev);
        struct mlx5_irq_pool *pool = table->pcif_pool;
        struct irq_affinity_desc af_desc;
-       int offset = 1;
+       int offset = MLX5_IRQ_VEC_COMP_BASE;
 
        if (!pool->xa_num_irqs.max)
                offset = 0;
index bc863e1f062e6bd316f6b54f87850e11123bbfea..7ebe712808275a7a1db290040d86c2cd5983c9d7 100644 (file)
@@ -75,7 +75,6 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia
                goto peer_devlink_set_err;
        }
 
-       devlink_register(devlink);
        return 0;
 
 peer_devlink_set_err:
@@ -101,7 +100,6 @@ static void mlx5_sf_dev_remove(struct auxiliary_device *adev)
        devlink = priv_to_devlink(mdev);
        set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state);
        mlx5_drain_health_wq(mdev);
-       devlink_unregister(devlink);
        if (mlx5_dev_is_lightweight(mdev))
                mlx5_uninit_one_light(mdev);
        else
index 64f4cc284aea41715abecb1167439efe401951f8..030a5776c937406540645462b5950cd209c37974 100644 (file)
@@ -205,12 +205,11 @@ dr_dump_hex_print(char hex[DR_HEX_SIZE], char *src, u32 size)
 }
 
 static int
-dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
+dr_dump_rule_action_mem(struct seq_file *file, char *buff, const u64 rule_id,
                        struct mlx5dr_rule_action_member *action_mem)
 {
        struct mlx5dr_action *action = action_mem->action;
        const u64 action_id = DR_DBG_PTR_TO_ID(action);
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        u64 hit_tbl_ptr, miss_tbl_ptr;
        u32 hit_tbl_id, miss_tbl_id;
        int ret;
@@ -488,10 +487,9 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id,
 }
 
 static int
-dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
+dr_dump_rule_mem(struct seq_file *file, char *buff, struct mlx5dr_ste *ste,
                 bool is_rx, const u64 rule_id, u8 format_ver)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        char hw_ste_dump[DR_HEX_SIZE];
        u32 mem_rec_type;
        int ret;
@@ -522,7 +520,8 @@ dr_dump_rule_mem(struct seq_file *file, struct mlx5dr_ste *ste,
 }
 
 static int
-dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
+dr_dump_rule_rx_tx(struct seq_file *file, char *buff,
+                  struct mlx5dr_rule_rx_tx *rule_rx_tx,
                   bool is_rx, const u64 rule_id, u8 format_ver)
 {
        struct mlx5dr_ste *ste_arr[DR_RULE_MAX_STES + DR_ACTION_MAX_STES];
@@ -533,7 +532,7 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
                return 0;
 
        while (i--) {
-               ret = dr_dump_rule_mem(file, ste_arr[i], is_rx, rule_id,
+               ret = dr_dump_rule_mem(file, buff, ste_arr[i], is_rx, rule_id,
                                       format_ver);
                if (ret < 0)
                        return ret;
@@ -542,7 +541,8 @@ dr_dump_rule_rx_tx(struct seq_file *file, struct mlx5dr_rule_rx_tx *rule_rx_tx,
        return 0;
 }
 
-static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
+static noinline_for_stack int
+dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
 {
        struct mlx5dr_rule_action_member *action_mem;
        const u64 rule_id = DR_DBG_PTR_TO_ID(rule);
@@ -565,19 +565,19 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
                return ret;
 
        if (rx->nic_matcher) {
-               ret = dr_dump_rule_rx_tx(file, rx, true, rule_id, format_ver);
+               ret = dr_dump_rule_rx_tx(file, buff, rx, true, rule_id, format_ver);
                if (ret < 0)
                        return ret;
        }
 
        if (tx->nic_matcher) {
-               ret = dr_dump_rule_rx_tx(file, tx, false, rule_id, format_ver);
+               ret = dr_dump_rule_rx_tx(file, buff, tx, false, rule_id, format_ver);
                if (ret < 0)
                        return ret;
        }
 
        list_for_each_entry(action_mem, &rule->rule_actions_list, list) {
-               ret = dr_dump_rule_action_mem(file, rule_id, action_mem);
+               ret = dr_dump_rule_action_mem(file, buff, rule_id, action_mem);
                if (ret < 0)
                        return ret;
        }
@@ -586,10 +586,10 @@ static int dr_dump_rule(struct seq_file *file, struct mlx5dr_rule *rule)
 }
 
 static int
-dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
+dr_dump_matcher_mask(struct seq_file *file, char *buff,
+                    struct mlx5dr_match_param *mask,
                     u8 criteria, const u64 matcher_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        char dump[DR_HEX_SIZE];
        int ret;
 
@@ -681,10 +681,10 @@ dr_dump_matcher_mask(struct seq_file *file, struct mlx5dr_match_param *mask,
 }
 
 static int
-dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
+dr_dump_matcher_builder(struct seq_file *file, char *buff,
+                       struct mlx5dr_ste_build *builder,
                        u32 index, bool is_rx, const u64 matcher_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        int ret;
 
        ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -702,11 +702,10 @@ dr_dump_matcher_builder(struct seq_file *file, struct mlx5dr_ste_build *builder,
 }
 
 static int
-dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_matcher_rx_tx(struct seq_file *file, char *buff, bool is_rx,
                      struct mlx5dr_matcher_rx_tx *matcher_rx_tx,
                      const u64 matcher_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        enum dr_dump_rec_type rec_type;
        u64 s_icm_addr, e_icm_addr;
        int i, ret;
@@ -731,7 +730,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
                return ret;
 
        for (i = 0; i < matcher_rx_tx->num_of_builders; i++) {
-               ret = dr_dump_matcher_builder(file,
+               ret = dr_dump_matcher_builder(file, buff,
                                              &matcher_rx_tx->ste_builder[i],
                                              i, is_rx, matcher_id);
                if (ret < 0)
@@ -741,7 +740,7 @@ dr_dump_matcher_rx_tx(struct seq_file *file, bool is_rx,
        return 0;
 }
 
-static int
+static noinline_for_stack int
 dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
 {
        struct mlx5dr_matcher_rx_tx *rx = &matcher->rx;
@@ -763,19 +762,19 @@ dr_dump_matcher(struct seq_file *file, struct mlx5dr_matcher *matcher)
        if (ret)
                return ret;
 
-       ret = dr_dump_matcher_mask(file, &matcher->mask,
+       ret = dr_dump_matcher_mask(file, buff, &matcher->mask,
                                   matcher->match_criteria, matcher_id);
        if (ret < 0)
                return ret;
 
        if (rx->nic_tbl) {
-               ret = dr_dump_matcher_rx_tx(file, true, rx, matcher_id);
+               ret = dr_dump_matcher_rx_tx(file, buff, true, rx, matcher_id);
                if (ret < 0)
                        return ret;
        }
 
        if (tx->nic_tbl) {
-               ret = dr_dump_matcher_rx_tx(file, false, tx, matcher_id);
+               ret = dr_dump_matcher_rx_tx(file, buff, false, tx, matcher_id);
                if (ret < 0)
                        return ret;
        }
@@ -803,11 +802,10 @@ dr_dump_matcher_all(struct seq_file *file, struct mlx5dr_matcher *matcher)
 }
 
 static int
-dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
+dr_dump_table_rx_tx(struct seq_file *file, char *buff, bool is_rx,
                    struct mlx5dr_table_rx_tx *table_rx_tx,
                    const u64 table_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        enum dr_dump_rec_type rec_type;
        u64 s_icm_addr;
        int ret;
@@ -829,7 +827,8 @@ dr_dump_table_rx_tx(struct seq_file *file, bool is_rx,
        return 0;
 }
 
-static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
+static noinline_for_stack int
+dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
 {
        struct mlx5dr_table_rx_tx *rx = &table->rx;
        struct mlx5dr_table_rx_tx *tx = &table->tx;
@@ -848,14 +847,14 @@ static int dr_dump_table(struct seq_file *file, struct mlx5dr_table *table)
                return ret;
 
        if (rx->nic_dmn) {
-               ret = dr_dump_table_rx_tx(file, true, rx,
+               ret = dr_dump_table_rx_tx(file, buff, true, rx,
                                          DR_DBG_PTR_TO_ID(table));
                if (ret < 0)
                        return ret;
        }
 
        if (tx->nic_dmn) {
-               ret = dr_dump_table_rx_tx(file, false, tx,
+               ret = dr_dump_table_rx_tx(file, buff, false, tx,
                                          DR_DBG_PTR_TO_ID(table));
                if (ret < 0)
                        return ret;
@@ -881,10 +880,10 @@ static int dr_dump_table_all(struct seq_file *file, struct mlx5dr_table *tbl)
 }
 
 static int
-dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
+dr_dump_send_ring(struct seq_file *file, char *buff,
+                 struct mlx5dr_send_ring *ring,
                  const u64 domain_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        int ret;
 
        ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -902,13 +901,13 @@ dr_dump_send_ring(struct seq_file *file, struct mlx5dr_send_ring *ring,
        return 0;
 }
 
-static noinline_for_stack int
+static int
 dr_dump_domain_info_flex_parser(struct seq_file *file,
+                               char *buff,
                                const char *flex_parser_name,
                                const u8 flex_parser_value,
                                const u64 domain_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        int ret;
 
        ret = snprintf(buff, MLX5DR_DEBUG_DUMP_BUFF_LENGTH,
@@ -925,11 +924,11 @@ dr_dump_domain_info_flex_parser(struct seq_file *file,
        return 0;
 }
 
-static noinline_for_stack int
-dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
+static int
+dr_dump_domain_info_caps(struct seq_file *file, char *buff,
+                        struct mlx5dr_cmd_caps *caps,
                         const u64 domain_id)
 {
-       char buff[MLX5DR_DEBUG_DUMP_BUFF_LENGTH];
        struct mlx5dr_cmd_vport_cap *vport_caps;
        unsigned long i, vports_num;
        int ret;
@@ -969,34 +968,35 @@ dr_dump_domain_info_caps(struct seq_file *file, struct mlx5dr_cmd_caps *caps,
 }
 
 static int
-dr_dump_domain_info(struct seq_file *file, struct mlx5dr_domain_info *info,
+dr_dump_domain_info(struct seq_file *file, char *buff,
+                   struct mlx5dr_domain_info *info,
                    const u64 domain_id)
 {
        int ret;
 
-       ret = dr_dump_domain_info_caps(file, &info->caps, domain_id);
+       ret = dr_dump_domain_info_caps(file, buff, &info->caps, domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmp_dw0",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw0",
                                              info->caps.flex_parser_id_icmp_dw0,
                                              domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmp_dw1",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmp_dw1",
                                              info->caps.flex_parser_id_icmp_dw1,
                                              domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw0",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw0",
                                              info->caps.flex_parser_id_icmpv6_dw0,
                                              domain_id);
        if (ret < 0)
                return ret;
 
-       ret = dr_dump_domain_info_flex_parser(file, "icmpv6_dw1",
+       ret = dr_dump_domain_info_flex_parser(file, buff, "icmpv6_dw1",
                                              info->caps.flex_parser_id_icmpv6_dw1,
                                              domain_id);
        if (ret < 0)
@@ -1032,12 +1032,12 @@ dr_dump_domain(struct seq_file *file, struct mlx5dr_domain *dmn)
        if (ret)
                return ret;
 
-       ret = dr_dump_domain_info(file, &dmn->info, domain_id);
+       ret = dr_dump_domain_info(file, buff, &dmn->info, domain_id);
        if (ret < 0)
                return ret;
 
        if (dmn->info.supp_sw_steering) {
-               ret = dr_dump_send_ring(file, dmn->send_ring, domain_id);
+               ret = dr_dump_send_ring(file, buff, dmn->send_ring, domain_id);
                if (ret < 0)
                        return ret;
        }
index e5ec0a363aff84d44470b8cf2374ef36a98c6b06..31f75b4a67fd79eb1c7c08096a5a11a163a96b73 100644 (file)
@@ -368,7 +368,6 @@ union ks8851_tx_hdr {
  * @rdfifo: FIFO read callback
  * @wrfifo: FIFO write callback
  * @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
  * @flush_tx_work: flush_tx_work() implementation callback
  *
  * The @statelock is used to protect information in the structure which may
@@ -423,8 +422,6 @@ struct ks8851_net {
                                          struct sk_buff *txp, bool irq);
        netdev_tx_t             (*start_xmit)(struct sk_buff *skb,
                                              struct net_device *dev);
-       void                    (*rx_skb)(struct ks8851_net *ks,
-                                         struct sk_buff *skb);
        void                    (*flush_tx_work)(struct ks8851_net *ks);
 };
 
index 0bf13b38b8f5b907b464649422331421337d8411..d4cdf3d4f55257ad5ace878f87abf5d96f67b001 100644 (file)
@@ -231,16 +231,6 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
                   rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
 }
 
-/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
-       ks->rx_skb(ks, skb);
-}
-
 /**
  * ks8851_rx_pkts - receive packets from the host
  * @ks: The device information.
@@ -309,7 +299,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
                                        ks8851_dbg_dumpkkt(ks, rxpkt);
 
                                skb->protocol = eth_type_trans(skb, ks->netdev);
-                               ks8851_rx_skb(ks, skb);
+                               __netif_rx(skb);
 
                                ks->netdev->stats.rx_packets++;
                                ks->netdev->stats.rx_bytes += rxlen;
@@ -340,6 +330,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
        unsigned long flags;
        unsigned int status;
 
+       local_bh_disable();
+
        ks8851_lock(ks, &flags);
 
        status = ks8851_rdreg16(ks, KS_ISR);
@@ -416,6 +408,8 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
        if (status & IRQ_LCI)
                mii_check_link(&ks->mii);
 
+       local_bh_enable();
+
        return IRQ_HANDLED;
 }
 
index 2a7f29854267030d3503a477fc5aef361761cdd8..381b9cd285ebd0bd3f5cf1e568e9b23cf0ba9c85 100644 (file)
@@ -210,16 +210,6 @@ static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
        iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
 }
 
-/**
- * ks8851_rx_skb_par - receive skbuff
- * @ks: The device state.
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
-{
-       netif_rx(skb);
-}
-
 static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
 {
        return ks8851_rdreg16_par(ks, KS_TXQCR);
@@ -298,7 +288,6 @@ static int ks8851_probe_par(struct platform_device *pdev)
        ks->rdfifo = ks8851_rdfifo_par;
        ks->wrfifo = ks8851_wrfifo_par;
        ks->start_xmit = ks8851_start_xmit_par;
-       ks->rx_skb = ks8851_rx_skb_par;
 
 #define STD_IRQ (IRQ_LCI |     /* Link Change */       \
                 IRQ_RXI |      /* RX done */           \
index 2f803377c9f9dd916153fbac269423006959e239..670c1de966db88030332a3da1d99416284a757a2 100644 (file)
@@ -298,16 +298,6 @@ static unsigned int calc_txlen(unsigned int len)
        return ALIGN(len + 4, 4);
 }
 
-/**
- * ks8851_rx_skb_spi - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
-       netif_rx(skb);
-}
-
 /**
  * ks8851_tx_work - process tx packet(s)
  * @work: The work strucutre what was scheduled.
@@ -435,7 +425,6 @@ static int ks8851_probe_spi(struct spi_device *spi)
        ks->rdfifo = ks8851_rdfifo_spi;
        ks->wrfifo = ks8851_wrfifo_spi;
        ks->start_xmit = ks8851_start_xmit_spi;
-       ks->rx_skb = ks8851_rx_skb_spi;
        ks->flush_tx_work = ks8851_flush_tx_work_spi;
 
 #define STD_IRQ (IRQ_LCI |     /* Link Change */       \
index 3a1b1a1f5a1951069f9c3e5ee5e3a10c1be55eb6..60dd2fd603a8554f02f5d649e8d290dc074b5d72 100644 (file)
@@ -731,7 +731,7 @@ static int sparx5_port_pcs_low_set(struct sparx5 *sparx5,
        bool sgmii = false, inband_aneg = false;
        int err;
 
-       if (port->conf.inband) {
+       if (conf->inband) {
                if (conf->portmode == PHY_INTERFACE_MODE_SGMII ||
                    conf->portmode == PHY_INTERFACE_MODE_QSGMII)
                        inband_aneg = true; /* Cisco-SGMII in-band-aneg */
@@ -948,7 +948,7 @@ int sparx5_port_pcs_set(struct sparx5 *sparx5,
        if (err)
                return -EINVAL;
 
-       if (port->conf.inband) {
+       if (conf->inband) {
                /* Enable/disable 1G counters in ASM */
                spx5_rmw(ASM_PORT_CFG_CSC_STAT_DIS_SET(high_speed_dev),
                         ASM_PORT_CFG_CSC_STAT_DIS,
index 523e0c470894f7fdcf8a995fb821ff146f08fcd9..55f255a3c9db69b92d5743bd42c34cbaba46a0a8 100644 (file)
@@ -36,6 +36,27 @@ struct sparx5_tc_flower_template {
        u16 l3_proto; /* protocol specified in the template */
 };
 
+/* SparX-5 VCAP fragment types:
+ * 0 = no fragment, 1 = initial fragment,
+ * 2 = suspicious fragment, 3 = valid follow-up fragment
+ */
+enum {                   /* key / mask */
+       FRAG_NOT   = 0x03, /* 0 / 3 */
+       FRAG_SOME  = 0x11, /* 1 / 1 */
+       FRAG_FIRST = 0x13, /* 1 / 3 */
+       FRAG_LATER = 0x33, /* 3 / 3 */
+       FRAG_INVAL = 0xff, /* invalid */
+};
+
+/* Flower fragment flag to VCAP fragment type mapping */
+static const u8 sparx5_vcap_frag_map[4][4] = {           /* is_frag */
+       { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_FIRST }, /* 0/0 */
+       { FRAG_NOT,   FRAG_NOT,   FRAG_INVAL, FRAG_INVAL }, /* 0/1 */
+       { FRAG_INVAL, FRAG_INVAL, FRAG_INVAL, FRAG_INVAL }, /* 1/0 */
+       { FRAG_SOME,  FRAG_LATER, FRAG_INVAL, FRAG_FIRST }  /* 1/1 */
+       /* 0/0        0/1         1/0         1/1 <-- first_frag */
+};
+
 static int
 sparx5_tc_flower_es0_tpid(struct vcap_tc_flower_parse_usage *st)
 {
@@ -145,29 +166,27 @@ sparx5_tc_flower_handler_control_usage(struct vcap_tc_flower_parse_usage *st)
        flow_rule_match_control(st->frule, &mt);
 
        if (mt.mask->flags) {
-               if (mt.mask->flags & FLOW_DIS_FIRST_FRAG) {
-                       if (mt.key->flags & FLOW_DIS_FIRST_FRAG) {
-                               value = 1; /* initial fragment */
-                               mask = 0x3;
-                       } else {
-                               if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
-                                       value = 3; /* follow up fragment */
-                                       mask = 0x3;
-                               } else {
-                                       value = 0; /* no fragment */
-                                       mask = 0x3;
-                               }
-                       }
-               } else {
-                       if (mt.mask->flags & FLOW_DIS_IS_FRAGMENT) {
-                               value = 3; /* follow up fragment */
-                               mask = 0x3;
-                       } else {
-                               value = 0; /* no fragment */
-                               mask = 0x3;
-                       }
+               u8 is_frag_key = !!(mt.key->flags & FLOW_DIS_IS_FRAGMENT);
+               u8 is_frag_mask = !!(mt.mask->flags & FLOW_DIS_IS_FRAGMENT);
+               u8 is_frag_idx = (is_frag_key << 1) | is_frag_mask;
+
+               u8 first_frag_key = !!(mt.key->flags & FLOW_DIS_FIRST_FRAG);
+               u8 first_frag_mask = !!(mt.mask->flags & FLOW_DIS_FIRST_FRAG);
+               u8 first_frag_idx = (first_frag_key << 1) | first_frag_mask;
+
+               /* Lookup verdict based on the 2 + 2 input bits */
+               u8 vdt = sparx5_vcap_frag_map[is_frag_idx][first_frag_idx];
+
+               if (vdt == FRAG_INVAL) {
+                       NL_SET_ERR_MSG_MOD(st->fco->common.extack,
+                                          "Match on invalid fragment flag combination");
+                       return -EINVAL;
                }
 
+               /* Extract VCAP fragment key and mask from verdict */
+               value = (vdt >> 4) & 0x3;
+               mask = vdt & 0x3;
+
                err = vcap_rule_add_key_u32(st->vrule,
                                            VCAP_KF_L3_FRAGMENT_TYPE,
                                            value, mask);
index 4c043052198d470ce0a2f82dbe1f3be67b35c827..00882ffc7a029ef63c1c8605541f86339999e724 100644 (file)
@@ -73,6 +73,7 @@ enum mac_version {
 };
 
 struct rtl8169_private;
+struct r8169_led_classdev;
 
 void r8169_apply_firmware(struct rtl8169_private *tp);
 u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
@@ -84,7 +85,8 @@ void r8169_get_led_name(struct rtl8169_private *tp, int idx,
                        char *buf, int buf_len);
 int rtl8168_get_led_mode(struct rtl8169_private *tp);
 int rtl8168_led_mod_ctrl(struct rtl8169_private *tp, u16 mask, u16 val);
-void rtl8168_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev);
 int rtl8125_get_led_mode(struct rtl8169_private *tp, int index);
 int rtl8125_set_led_mode(struct rtl8169_private *tp, int index, u16 mode);
-void rtl8125_init_leds(struct net_device *ndev);
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev);
+void r8169_remove_leds(struct r8169_led_classdev *leds);
index 7c5dc9d0df855ef57592b7a25d4357232279a60f..e10bee706bc691b8c32ec2410baa8d4279de69a0 100644 (file)
@@ -146,22 +146,22 @@ static void rtl8168_setup_ldev(struct r8169_led_classdev *ldev,
        led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
 
        /* ignore errors */
-       devm_led_classdev_register(&ndev->dev, led_cdev);
+       led_classdev_register(&ndev->dev, led_cdev);
 }
 
-void rtl8168_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8168_init_leds(struct net_device *ndev)
 {
-       /* bind resource mgmt to netdev */
-       struct device *dev = &ndev->dev;
        struct r8169_led_classdev *leds;
        int i;
 
-       leds = devm_kcalloc(dev, RTL8168_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+       leds = kcalloc(RTL8168_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
        if (!leds)
-               return;
+               return NULL;
 
        for (i = 0; i < RTL8168_NUM_LEDS; i++)
                rtl8168_setup_ldev(leds + i, ndev, i);
+
+       return leds;
 }
 
 static int rtl8125_led_hw_control_is_supported(struct led_classdev *led_cdev,
@@ -245,20 +245,31 @@ static void rtl8125_setup_led_ldev(struct r8169_led_classdev *ldev,
        led_cdev->hw_control_get_device = r8169_led_hw_control_get_device;
 
        /* ignore errors */
-       devm_led_classdev_register(&ndev->dev, led_cdev);
+       led_classdev_register(&ndev->dev, led_cdev);
 }
 
-void rtl8125_init_leds(struct net_device *ndev)
+struct r8169_led_classdev *rtl8125_init_leds(struct net_device *ndev)
 {
-       /* bind resource mgmt to netdev */
-       struct device *dev = &ndev->dev;
        struct r8169_led_classdev *leds;
        int i;
 
-       leds = devm_kcalloc(dev, RTL8125_NUM_LEDS, sizeof(*leds), GFP_KERNEL);
+       leds = kcalloc(RTL8125_NUM_LEDS + 1, sizeof(*leds), GFP_KERNEL);
        if (!leds)
-               return;
+               return NULL;
 
        for (i = 0; i < RTL8125_NUM_LEDS; i++)
                rtl8125_setup_led_ldev(leds + i, ndev, i);
+
+       return leds;
+}
+
+void r8169_remove_leds(struct r8169_led_classdev *leds)
+{
+       if (!leds)
+               return;
+
+       for (struct r8169_led_classdev *l = leds; l->ndev; l++)
+               led_classdev_unregister(&l->led);
+
+       kfree(leds);
 }
index 6f1e6f386b7ba7277bc211765b8ef49d9fcd6750..0fc5fe564ae50be28bc6568f90d339d840a4b8d1 100644 (file)
@@ -647,6 +647,8 @@ struct rtl8169_private {
        const char *fw_name;
        struct rtl_fw *rtl_fw;
 
+       struct r8169_led_classdev *leds;
+
        u32 ocp_base;
 };
 
@@ -5044,6 +5046,9 @@ static void rtl_remove_one(struct pci_dev *pdev)
 
        cancel_work_sync(&tp->wk.work);
 
+       if (IS_ENABLED(CONFIG_R8169_LEDS))
+               r8169_remove_leds(tp->leds);
+
        unregister_netdev(tp->dev);
 
        if (tp->dash_type != RTL_DASH_NONE)
@@ -5501,9 +5506,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        if (IS_ENABLED(CONFIG_R8169_LEDS)) {
                if (rtl_is_8125(tp))
-                       rtl8125_init_leds(dev);
+                       tp->leds = rtl8125_init_leds(dev);
                else if (tp->mac_version > RTL_GIGA_MAC_VER_06)
-                       rtl8168_init_leds(dev);
+                       tp->leds = rtl8168_init_leds(dev);
        }
 
        netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
index ba01c8cc3c906d5ea9a02029dc76fabc243b277c..fcb756d77681cbaf2a17d3e21ddffa5543bc9a84 100644 (file)
@@ -769,25 +769,28 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
        dma_addr_t dma_addr;
        int rx_packets = 0;
        u8  desc_status;
-       u16 pkt_len;
+       u16 desc_len;
        u8  die_dt;
        int entry;
        int limit;
        int i;
 
-       entry = priv->cur_rx[q] % priv->num_rx_ring[q];
        limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
        stats = &priv->stats[q];
 
-       desc = &priv->rx_ring[q].desc[entry];
-       for (i = 0; i < limit && rx_packets < *quota && desc->die_dt != DT_FEMPTY; i++) {
+       for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+               entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+               desc = &priv->rx_ring[q].desc[entry];
+               if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+                       break;
+
                /* Descriptor type must be checked before all other reads */
                dma_rmb();
                desc_status = desc->msc;
-               pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
+               desc_len = le16_to_cpu(desc->ds_cc) & RX_DS;
 
                /* We use 0-byte descriptors to mark the DMA mapping errors */
-               if (!pkt_len)
+               if (!desc_len)
                        continue;
 
                if (desc_status & MSC_MC)
@@ -808,25 +811,25 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                        switch (die_dt) {
                        case DT_FSINGLE:
                                skb = ravb_get_skb_gbeth(ndev, entry, desc);
-                               skb_put(skb, pkt_len);
+                               skb_put(skb, desc_len);
                                skb->protocol = eth_type_trans(skb, ndev);
                                if (ndev->features & NETIF_F_RXCSUM)
                                        ravb_rx_csum_gbeth(skb);
                                napi_gro_receive(&priv->napi[q], skb);
                                rx_packets++;
-                               stats->rx_bytes += pkt_len;
+                               stats->rx_bytes += desc_len;
                                break;
                        case DT_FSTART:
                                priv->rx_1st_skb = ravb_get_skb_gbeth(ndev, entry, desc);
-                               skb_put(priv->rx_1st_skb, pkt_len);
+                               skb_put(priv->rx_1st_skb, desc_len);
                                break;
                        case DT_FMID:
                                skb = ravb_get_skb_gbeth(ndev, entry, desc);
                                skb_copy_to_linear_data_offset(priv->rx_1st_skb,
                                                               priv->rx_1st_skb->len,
                                                               skb->data,
-                                                              pkt_len);
-                               skb_put(priv->rx_1st_skb, pkt_len);
+                                                              desc_len);
+                               skb_put(priv->rx_1st_skb, desc_len);
                                dev_kfree_skb(skb);
                                break;
                        case DT_FEND:
@@ -834,23 +837,20 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q)
                                skb_copy_to_linear_data_offset(priv->rx_1st_skb,
                                                               priv->rx_1st_skb->len,
                                                               skb->data,
-                                                              pkt_len);
-                               skb_put(priv->rx_1st_skb, pkt_len);
+                                                              desc_len);
+                               skb_put(priv->rx_1st_skb, desc_len);
                                dev_kfree_skb(skb);
                                priv->rx_1st_skb->protocol =
                                        eth_type_trans(priv->rx_1st_skb, ndev);
                                if (ndev->features & NETIF_F_RXCSUM)
-                                       ravb_rx_csum_gbeth(skb);
+                                       ravb_rx_csum_gbeth(priv->rx_1st_skb);
+                               stats->rx_bytes += priv->rx_1st_skb->len;
                                napi_gro_receive(&priv->napi[q],
                                                 priv->rx_1st_skb);
                                rx_packets++;
-                               stats->rx_bytes += pkt_len;
                                break;
                        }
                }
-
-               entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
-               desc = &priv->rx_ring[q].desc[entry];
        }
 
        /* Refill the RX ring buffers. */
@@ -891,30 +891,29 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
 {
        struct ravb_private *priv = netdev_priv(ndev);
        const struct ravb_hw_info *info = priv->info;
-       int entry = priv->cur_rx[q] % priv->num_rx_ring[q];
-       int boguscnt = (priv->dirty_rx[q] + priv->num_rx_ring[q]) -
-                       priv->cur_rx[q];
        struct net_device_stats *stats = &priv->stats[q];
        struct ravb_ex_rx_desc *desc;
+       unsigned int limit, i;
        struct sk_buff *skb;
        dma_addr_t dma_addr;
        struct timespec64 ts;
+       int rx_packets = 0;
        u8  desc_status;
        u16 pkt_len;
-       int limit;
+       int entry;
+
+       limit = priv->dirty_rx[q] + priv->num_rx_ring[q] - priv->cur_rx[q];
+       for (i = 0; i < limit; i++, priv->cur_rx[q]++) {
+               entry = priv->cur_rx[q] % priv->num_rx_ring[q];
+               desc = &priv->rx_ring[q].ex_desc[entry];
+               if (rx_packets == *quota || desc->die_dt == DT_FEMPTY)
+                       break;
 
-       boguscnt = min(boguscnt, *quota);
-       limit = boguscnt;
-       desc = &priv->rx_ring[q].ex_desc[entry];
-       while (desc->die_dt != DT_FEMPTY) {
                /* Descriptor type must be checked before all other reads */
                dma_rmb();
                desc_status = desc->msc;
                pkt_len = le16_to_cpu(desc->ds_cc) & RX_DS;
 
-               if (--boguscnt < 0)
-                       break;
-
                /* We use 0-byte descriptors to mark the DMA mapping errors */
                if (!pkt_len)
                        continue;
@@ -960,12 +959,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
                        if (ndev->features & NETIF_F_RXCSUM)
                                ravb_rx_csum(skb);
                        napi_gro_receive(&priv->napi[q], skb);
-                       stats->rx_packets++;
+                       rx_packets++;
                        stats->rx_bytes += pkt_len;
                }
-
-               entry = (++priv->cur_rx[q]) % priv->num_rx_ring[q];
-               desc = &priv->rx_ring[q].ex_desc[entry];
        }
 
        /* Refill the RX ring buffers. */
@@ -995,9 +991,9 @@ static bool ravb_rx_rcar(struct net_device *ndev, int *quota, int q)
                desc->die_dt = DT_FEMPTY;
        }
 
-       *quota -= limit - (++boguscnt);
-
-       return boguscnt <= 0;
+       stats->rx_packets += rx_packets;
+       *quota -= rx_packets;
+       return *quota == 0;
 }
 
 /* Packet receive function for Ethernet AVB */
index a6fefe675ef1520566ccdcafaac705f0ee159e42..3b7d4ac1e7be07cb2a0fc796f73b671ed535f01d 100644 (file)
@@ -553,6 +553,7 @@ extern const struct stmmac_hwtimestamp stmmac_ptp;
 extern const struct stmmac_mode_ops dwmac4_ring_mode_ops;
 
 struct mac_link {
+       u32 caps;
        u32 speed_mask;
        u32 speed10;
        u32 speed100;
index b21d99faa2d04c985427af61724dd073e3a2fe79..e1b761dcfa1dd56f2e5218312933eb1ea6bc06b1 100644 (file)
@@ -1096,6 +1096,8 @@ static struct mac_device_info *sun8i_dwmac_setup(void *ppriv)
 
        priv->dev->priv_flags |= IFF_UNICAST_FLT;
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100 | MAC_1000;
        /* The loopback bit seems to be re-set when link change
         * Simply mask it each time
         * Speed 10/100/1000 are set in BIT(2)/BIT(3)
index 3927609abc44110be97903aee12e25084473b80c..8555299443f4edf2475b95c1785544a1c3b73251 100644 (file)
@@ -539,6 +539,8 @@ int dwmac1000_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100 | MAC_1000;
        mac->link.duplex = GMAC_CONTROL_DM;
        mac->link.speed10 = GMAC_CONTROL_PS;
        mac->link.speed100 = GMAC_CONTROL_PS | GMAC_CONTROL_FES;
index a6e8d7bd95886fc277c7e22c896ddf618e0fca97..7667d103cd0ebd9670a42360a095cfd322c8ebac 100644 (file)
@@ -175,6 +175,8 @@ int dwmac100_setup(struct stmmac_priv *priv)
        dev_info(priv->device, "\tDWMAC100\n");
 
        mac->pcsr = priv->ioaddr;
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100;
        mac->link.duplex = MAC_CONTROL_F;
        mac->link.speed10 = 0;
        mac->link.speed100 = 0;
index cef25efbdff99fdc07a313ab678869e83c85f79e..a38226d7cc6a99e45c39f62c81c56d8dc87a921a 100644 (file)
@@ -70,7 +70,10 @@ static void dwmac4_core_init(struct mac_device_info *hw,
 
 static void dwmac4_phylink_get_caps(struct stmmac_priv *priv)
 {
-       priv->phylink_config.mac_capabilities |= MAC_2500FD;
+       if (priv->plat->tx_queues_to_use > 1)
+               priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD | MAC_1000HD);
+       else
+               priv->hw->link.caps |= (MAC_10HD | MAC_100HD | MAC_1000HD);
 }
 
 static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
@@ -1378,6 +1381,8 @@ int dwmac4_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_10 | MAC_100 | MAC_1000 | MAC_2500FD;
        mac->link.duplex = GMAC_CONFIG_DM;
        mac->link.speed10 = GMAC_CONFIG_PS;
        mac->link.speed100 = GMAC_CONFIG_FES | GMAC_CONFIG_PS;
index e841e312077ef0604c5b17a5473069cc4affadff..f8e7775bb63364c589da99cb4c954a38f4411567 100644 (file)
@@ -47,14 +47,6 @@ static void dwxgmac2_core_init(struct mac_device_info *hw,
        writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN);
 }
 
-static void xgmac_phylink_get_caps(struct stmmac_priv *priv)
-{
-       priv->phylink_config.mac_capabilities |= MAC_2500FD | MAC_5000FD |
-                                                MAC_10000FD | MAC_25000FD |
-                                                MAC_40000FD | MAC_50000FD |
-                                                MAC_100000FD;
-}
-
 static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable)
 {
        u32 tx = readl(ioaddr + XGMAC_TX_CONFIG);
@@ -1540,7 +1532,6 @@ static void dwxgmac3_fpe_configure(void __iomem *ioaddr, struct stmmac_fpe_cfg *
 
 const struct stmmac_ops dwxgmac210_ops = {
        .core_init = dwxgmac2_core_init,
-       .phylink_get_caps = xgmac_phylink_get_caps,
        .set_mac = dwxgmac2_set_mac,
        .rx_ipc = dwxgmac2_rx_ipc,
        .rx_queue_enable = dwxgmac2_rx_queue_enable,
@@ -1601,7 +1592,6 @@ static void dwxlgmac2_rx_queue_enable(struct mac_device_info *hw, u8 mode,
 
 const struct stmmac_ops dwxlgmac2_ops = {
        .core_init = dwxgmac2_core_init,
-       .phylink_get_caps = xgmac_phylink_get_caps,
        .set_mac = dwxgmac2_set_mac,
        .rx_ipc = dwxgmac2_rx_ipc,
        .rx_queue_enable = dwxlgmac2_rx_queue_enable,
@@ -1661,6 +1651,9 @@ int dwxgmac2_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_1000FD | MAC_2500FD | MAC_5000FD |
+                        MAC_10000FD;
        mac->link.duplex = 0;
        mac->link.speed10 = XGMAC_CONFIG_SS_10_MII;
        mac->link.speed100 = XGMAC_CONFIG_SS_100_MII;
@@ -1698,6 +1691,11 @@ int dwxlgmac2_setup(struct stmmac_priv *priv)
        if (mac->multicast_filter_bins)
                mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins);
 
+       mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
+                        MAC_1000FD | MAC_2500FD | MAC_5000FD |
+                        MAC_10000FD | MAC_25000FD |
+                        MAC_40000FD | MAC_50000FD |
+                        MAC_100000FD;
        mac->link.duplex = 0;
        mac->link.speed1000 = XLGMAC_CONFIG_SS_1000;
        mac->link.speed2500 = XLGMAC_CONFIG_SS_2500;
index dff02d75d519713e61ffa5db651ece91242aece0..5d1ea3e07459a390f2d561b4147c7c0c3f33e4ce 100644 (file)
@@ -52,6 +52,7 @@ struct stmmac_counters {
        unsigned int mmc_tx_excessdef;
        unsigned int mmc_tx_pause_frame;
        unsigned int mmc_tx_vlan_frame_g;
+       unsigned int mmc_tx_oversize_g;
        unsigned int mmc_tx_lpi_usec;
        unsigned int mmc_tx_lpi_tran;
 
@@ -80,6 +81,7 @@ struct stmmac_counters {
        unsigned int mmc_rx_fifo_overflow;
        unsigned int mmc_rx_vlan_frames_gb;
        unsigned int mmc_rx_watchdog_error;
+       unsigned int mmc_rx_error;
        unsigned int mmc_rx_lpi_usec;
        unsigned int mmc_rx_lpi_tran;
        unsigned int mmc_rx_discard_frames_gb;
index 7eb477faa75a3853e7698e5a4aab3376d5d162e9..0fab842902a850022a3be368d4972e4f4e9bcdc9 100644 (file)
@@ -53,6 +53,7 @@
 #define MMC_TX_EXCESSDEF               0x6c
 #define MMC_TX_PAUSE_FRAME             0x70
 #define MMC_TX_VLAN_FRAME_G            0x74
+#define MMC_TX_OVERSIZE_G              0x78
 
 /* MMC RX counter registers */
 #define MMC_RX_FRAMECOUNT_GB           0x80
 #define MMC_RX_FIFO_OVERFLOW           0xd4
 #define MMC_RX_VLAN_FRAMES_GB          0xd8
 #define MMC_RX_WATCHDOG_ERROR          0xdc
+#define MMC_RX_ERROR                   0xe0
+
+#define MMC_TX_LPI_USEC                        0xec
+#define MMC_TX_LPI_TRAN                        0xf0
+#define MMC_RX_LPI_USEC                        0xf4
+#define MMC_RX_LPI_TRAN                        0xf8
+
 /* IPC*/
 #define MMC_RX_IPC_INTR_MASK           0x100
 #define MMC_RX_IPC_INTR                        0x108
@@ -283,6 +291,9 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
        mmc->mmc_tx_excessdef += readl(mmcaddr + MMC_TX_EXCESSDEF);
        mmc->mmc_tx_pause_frame += readl(mmcaddr + MMC_TX_PAUSE_FRAME);
        mmc->mmc_tx_vlan_frame_g += readl(mmcaddr + MMC_TX_VLAN_FRAME_G);
+       mmc->mmc_tx_oversize_g   += readl(mmcaddr + MMC_TX_OVERSIZE_G);
+       mmc->mmc_tx_lpi_usec += readl(mmcaddr + MMC_TX_LPI_USEC);
+       mmc->mmc_tx_lpi_tran += readl(mmcaddr + MMC_TX_LPI_TRAN);
 
        /* MMC RX counter registers */
        mmc->mmc_rx_framecount_gb += readl(mmcaddr + MMC_RX_FRAMECOUNT_GB);
@@ -316,6 +327,10 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
        mmc->mmc_rx_fifo_overflow += readl(mmcaddr + MMC_RX_FIFO_OVERFLOW);
        mmc->mmc_rx_vlan_frames_gb += readl(mmcaddr + MMC_RX_VLAN_FRAMES_GB);
        mmc->mmc_rx_watchdog_error += readl(mmcaddr + MMC_RX_WATCHDOG_ERROR);
+       mmc->mmc_rx_error += readl(mmcaddr + MMC_RX_ERROR);
+       mmc->mmc_rx_lpi_usec += readl(mmcaddr + MMC_RX_LPI_USEC);
+       mmc->mmc_rx_lpi_tran += readl(mmcaddr + MMC_RX_LPI_TRAN);
+
        /* IPv4 */
        mmc->mmc_rx_ipv4_gd += readl(mmcaddr + MMC_RX_IPV4_GD);
        mmc->mmc_rx_ipv4_hderr += readl(mmcaddr + MMC_RX_IPV4_HDERR);
index e1537a57815f387082acdb954ee6b40d397990c0..542e2633a6f52223bf15ef31fb17d2377fb4583c 100644 (file)
@@ -212,6 +212,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
        STMMAC_MMC_STAT(mmc_tx_excessdef),
        STMMAC_MMC_STAT(mmc_tx_pause_frame),
        STMMAC_MMC_STAT(mmc_tx_vlan_frame_g),
+       STMMAC_MMC_STAT(mmc_tx_oversize_g),
        STMMAC_MMC_STAT(mmc_tx_lpi_usec),
        STMMAC_MMC_STAT(mmc_tx_lpi_tran),
        STMMAC_MMC_STAT(mmc_rx_framecount_gb),
@@ -238,6 +239,7 @@ static const struct stmmac_stats stmmac_mmc[] = {
        STMMAC_MMC_STAT(mmc_rx_fifo_overflow),
        STMMAC_MMC_STAT(mmc_rx_vlan_frames_gb),
        STMMAC_MMC_STAT(mmc_rx_watchdog_error),
+       STMMAC_MMC_STAT(mmc_rx_error),
        STMMAC_MMC_STAT(mmc_rx_lpi_usec),
        STMMAC_MMC_STAT(mmc_rx_lpi_tran),
        STMMAC_MMC_STAT(mmc_rx_discard_frames_gb),
index 24cd80490d19cf86c2cd566b81f13b137109d784..7c6fb14b555508e4461980f99843ac461b323239 100644 (file)
@@ -1198,17 +1198,6 @@ static int stmmac_init_phy(struct net_device *dev)
        return ret;
 }
 
-static void stmmac_set_half_duplex(struct stmmac_priv *priv)
-{
-       /* Half-Duplex can only work with single tx queue */
-       if (priv->plat->tx_queues_to_use > 1)
-               priv->phylink_config.mac_capabilities &=
-                       ~(MAC_10HD | MAC_100HD | MAC_1000HD);
-       else
-               priv->phylink_config.mac_capabilities |=
-                       (MAC_10HD | MAC_100HD | MAC_1000HD);
-}
-
 static int stmmac_phy_setup(struct stmmac_priv *priv)
 {
        struct stmmac_mdio_bus_data *mdio_bus_data;
@@ -1236,15 +1225,11 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
                xpcs_get_interfaces(priv->hw->xpcs,
                                    priv->phylink_config.supported_interfaces);
 
-       priv->phylink_config.mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
-                                               MAC_10FD | MAC_100FD |
-                                               MAC_1000FD;
-
-       stmmac_set_half_duplex(priv);
-
        /* Get the MAC specific capabilities */
        stmmac_mac_phylink_get_caps(priv);
 
+       priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
        max_speed = priv->plat->max_speed;
        if (max_speed)
                phylink_limit_mac_speed(&priv->phylink_config, max_speed);
@@ -7342,6 +7327,7 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
        int ret = 0, i;
+       int max_speed;
 
        if (netif_running(dev))
                stmmac_release(dev);
@@ -7355,7 +7341,14 @@ int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt)
                        priv->rss.table[i] = ethtool_rxfh_indir_default(i,
                                                                        rx_cnt);
 
-       stmmac_set_half_duplex(priv);
+       stmmac_mac_phylink_get_caps(priv);
+
+       priv->phylink_config.mac_capabilities = priv->hw->link.caps;
+
+       max_speed = priv->plat->max_speed;
+       if (max_speed)
+               phylink_limit_mac_speed(&priv->phylink_config, max_speed);
+
        stmmac_napi_add(dev);
 
        if (netif_running(dev))
index 2939a21ca74f3cf0f627981df74a949e9c61011e..1d00e21808c1c36dde2fcd4e6a864ca1ecf72a0b 100644 (file)
@@ -2793,6 +2793,8 @@ static void am65_cpsw_unregister_devlink(struct am65_cpsw_common *common)
 
 static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
 {
+       struct am65_cpsw_rx_chn *rx_chan = &common->rx_chns;
+       struct am65_cpsw_tx_chn *tx_chan = common->tx_chns;
        struct device *dev = common->dev;
        struct am65_cpsw_port *port;
        int ret = 0, i;
@@ -2805,6 +2807,22 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
        if (ret)
                return ret;
 
+       /* The DMA Channels are not guaranteed to be in a clean state.
+        * Reset and disable them to ensure that they are back to the
+        * clean state and ready to be used.
+        */
+       for (i = 0; i < common->tx_ch_num; i++) {
+               k3_udma_glue_reset_tx_chn(tx_chan[i].tx_chn, &tx_chan[i],
+                                         am65_cpsw_nuss_tx_cleanup);
+               k3_udma_glue_disable_tx_chn(tx_chan[i].tx_chn);
+       }
+
+       for (i = 0; i < AM65_CPSW_MAX_RX_FLOWS; i++)
+               k3_udma_glue_reset_rx_chn(rx_chan->rx_chn, i, rx_chan,
+                                         am65_cpsw_nuss_rx_cleanup, !!i);
+
+       k3_udma_glue_disable_rx_chn(rx_chan->rx_chn);
+
        ret = am65_cpsw_nuss_register_devlink(common);
        if (ret)
                return ret;
index 2f6739fe78af2e8e90c0a3b474c2e99c83e02994..6c2835086b57eacbcddb44a3c507e26d5a944427 100644 (file)
@@ -822,7 +822,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
        __be16 sport;
        int err;
 
-       if (!pskb_inet_may_pull(skb))
+       if (!skb_vlan_inet_prepare(skb))
                return -EINVAL;
 
        if (!gs4)
@@ -929,7 +929,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
        __be16 sport;
        int err;
 
-       if (!pskb_inet_may_pull(skb))
+       if (!skb_vlan_inet_prepare(skb))
                return -EINVAL;
 
        if (!gs6)
index a6fcbda64ecc60e5beccf20f2043ab00870cbd5d..2b6ec979a62f2160a7187e024a4b0dc6bf9e08da 100644 (file)
@@ -154,8 +154,11 @@ static void free_netvsc_device(struct rcu_head *head)
        int i;
 
        kfree(nvdev->extension);
-       vfree(nvdev->recv_buf);
-       vfree(nvdev->send_buf);
+
+       if (!nvdev->recv_buf_gpadl_handle.decrypted)
+               vfree(nvdev->recv_buf);
+       if (!nvdev->send_buf_gpadl_handle.decrypted)
+               vfree(nvdev->send_buf);
        bitmap_free(nvdev->send_section_map);
 
        for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
index 0b3f21cba552f27221a2c7fbe8147feb34e69724..92da8c03d960c9beca5b425a1e3d366f37fa21fc 100644 (file)
@@ -2125,14 +2125,16 @@ static ssize_t tun_put_user(struct tun_struct *tun,
                                            tun_is_little_endian(tun), true,
                                            vlan_hlen)) {
                        struct skb_shared_info *sinfo = skb_shinfo(skb);
-                       pr_err("unexpected GSO type: "
-                              "0x%x, gso_size %d, hdr_len %d\n",
-                              sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
-                              tun16_to_cpu(tun, gso.hdr_len));
-                       print_hex_dump(KERN_ERR, "tun: ",
-                                      DUMP_PREFIX_NONE,
-                                      16, 1, skb->head,
-                                      min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+
+                       if (net_ratelimit()) {
+                               netdev_err(tun->dev, "unexpected GSO type: 0x%x, gso_size %d, hdr_len %d\n",
+                                          sinfo->gso_type, tun16_to_cpu(tun, gso.gso_size),
+                                          tun16_to_cpu(tun, gso.hdr_len));
+                               print_hex_dump(KERN_ERR, "tun: ",
+                                              DUMP_PREFIX_NONE,
+                                              16, 1, skb->head,
+                                              min((int)tun16_to_cpu(tun, gso.hdr_len), 64), true);
+                       }
                        WARN_ON_ONCE(1);
                        return -EINVAL;
                }
index a9c418890a1cacc584c9265f4716fa2e18fe0e4b..752f821a19901f313a1aca51fe332539ce82385b 100644 (file)
@@ -1317,6 +1317,8 @@ static int ax88179_bind(struct usbnet *dev, struct usb_interface *intf)
 
        netif_set_tso_max_size(dev->net, 16384);
 
+       ax88179_reset(dev);
+
        return 0;
 }
 
@@ -1695,7 +1697,6 @@ static const struct driver_info ax88179_info = {
        .unbind = ax88179_unbind,
        .status = ax88179_status,
        .link_reset = ax88179_link_reset,
-       .reset = ax88179_reset,
        .stop = ax88179_stop,
        .flags = FLAG_ETHER | FLAG_FRAMING_AX,
        .rx_fixup = ax88179_rx_fixup,
@@ -1708,7 +1709,6 @@ static const struct driver_info ax88178a_info = {
        .unbind = ax88179_unbind,
        .status = ax88179_status,
        .link_reset = ax88179_link_reset,
-       .reset = ax88179_reset,
        .stop = ax88179_stop,
        .flags = FLAG_ETHER | FLAG_FRAMING_AX,
        .rx_fixup = ax88179_rx_fixup,
index e2e181378f4124c64b1d02bbe910f6209b57a356..edc34402e787f9ff84a345ecb892cc7b720ef312 100644 (file)
@@ -1431,6 +1431,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x2692, 0x9025, 4)},    /* Cellient MPL200 (rebranded Qualcomm 05c6:9025) */
        {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */
        {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */
+       {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */
 
        /* 4. Gobi 1000 devices */
        {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)},    /* Acer Gobi Modem Device */
index c22d1118a13333702c41d0b11148eb067700965b..115c3c5414f2a7aa4f2d8bae0aae96ab574f3235 100644 (file)
@@ -3807,6 +3807,7 @@ static int virtnet_set_rxfh(struct net_device *dev,
                            struct netlink_ext_ack *extack)
 {
        struct virtnet_info *vi = netdev_priv(dev);
+       bool update = false;
        int i;
 
        if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE &&
@@ -3814,13 +3815,28 @@ static int virtnet_set_rxfh(struct net_device *dev,
                return -EOPNOTSUPP;
 
        if (rxfh->indir) {
+               if (!vi->has_rss)
+                       return -EOPNOTSUPP;
+
                for (i = 0; i < vi->rss_indir_table_size; ++i)
                        vi->ctrl->rss.indirection_table[i] = rxfh->indir[i];
+               update = true;
        }
-       if (rxfh->key)
+
+       if (rxfh->key) {
+               /* If either _F_HASH_REPORT or _F_RSS are negotiated, the
+                * device provides hash calculation capabilities, that is,
+                * hash_key is configured.
+                */
+               if (!vi->has_rss && !vi->has_rss_hash_report)
+                       return -EOPNOTSUPP;
+
                memcpy(vi->ctrl->rss.key, rxfh->key, vi->rss_key_size);
+               update = true;
+       }
 
-       virtnet_commit_rss_command(vi);
+       if (update)
+               virtnet_commit_rss_command(vi);
 
        return 0;
 }
@@ -4729,13 +4745,15 @@ static int virtnet_probe(struct virtio_device *vdev)
        if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT))
                vi->has_rss_hash_report = true;
 
-       if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS))
+       if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) {
                vi->has_rss = true;
 
-       if (vi->has_rss || vi->has_rss_hash_report) {
                vi->rss_indir_table_size =
                        virtio_cread16(vdev, offsetof(struct virtio_net_config,
                                rss_max_indirection_table_length));
+       }
+
+       if (vi->has_rss || vi->has_rss_hash_report) {
                vi->rss_key_size =
                        virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size));
 
index bf4833221816d492d4adca02d508d33a74879a92..eff7f5df08e27fb25909999a89c6742785038b75 100644 (file)
@@ -3765,14 +3765,6 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset);
  */
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CAVIUM, 0xa100, quirk_no_bus_reset);
 
-/*
- * Apparently the LSI / Agere FW643 can't recover after a Secondary Bus
- * Reset and requires a power-off or suspend/resume and rescan.  Prevent
- * use of that reset.
- */
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5900, quirk_no_bus_reset);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATT, 0x5901, quirk_no_bus_reset);
-
 /*
  * Some TI KeyStone C667X devices do not support bus/hot reset.  The PCIESS
  * automatically disables LTSSM when Secondary Bus Reset is received and
index 8ea867c2a01a371a64e1eb10327931861d306dc8..62bc24f6dcc7a82cb11361b133d65bab77d90152 100644 (file)
@@ -263,12 +263,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
        if (!ec_dev)
                return -ENOMEM;
 
-       ret = devm_serdev_device_open(dev, serdev);
-       if (ret) {
-               dev_err(dev, "Unable to open UART device");
-               return ret;
-       }
-
        serdev_device_set_drvdata(serdev, ec_dev);
        init_waitqueue_head(&ec_uart->response.wait_queue);
 
@@ -280,14 +274,6 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
                return ret;
        }
 
-       ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
-       if (ret < 0) {
-               dev_err(dev, "Failed to set up host baud rate (%d)", ret);
-               return ret;
-       }
-
-       serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
-
        /* Initialize ec_dev for cros_ec  */
        ec_dev->phys_name = dev_name(dev);
        ec_dev->dev = dev;
@@ -301,6 +287,20 @@ static int cros_ec_uart_probe(struct serdev_device *serdev)
 
        serdev_device_set_client_ops(serdev, &cros_ec_uart_client_ops);
 
+       ret = devm_serdev_device_open(dev, serdev);
+       if (ret) {
+               dev_err(dev, "Unable to open UART device");
+               return ret;
+       }
+
+       ret = serdev_device_set_baudrate(serdev, ec_uart->baudrate);
+       if (ret < 0) {
+               dev_err(dev, "Failed to set up host baud rate (%d)", ret);
+               return ret;
+       }
+
+       serdev_device_set_flow_control(serdev, ec_uart->flowcontrol);
+
        return cros_ec_register(ec_dev);
 }
 
index ee2e164f86b9c2973e317bfbfe3297571a8cab48..38c932df6446ac5714d225ac0545ff16345a6e27 100644 (file)
@@ -597,6 +597,15 @@ static const struct dmi_system_id acer_quirks[] __initconst = {
                },
                .driver_data = &quirk_acer_predator_v4,
        },
+       {
+               .callback = dmi_matched,
+               .ident = "Acer Predator PH18-71",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Predator PH18-71"),
+               },
+               .driver_data = &quirk_acer_predator_v4,
+       },
        {
                .callback = set_force_caps,
                .ident = "Acer Aspire Switch 10E SW3-016",
index b456370166b6bb2158ca0916e0eb9e106f9fd9d7..b4f49720c87f62aa6e8349af12797382f740c2b7 100644 (file)
@@ -208,6 +208,15 @@ static const struct dmi_system_id fwbug_list[] = {
                        DMI_MATCH(DMI_BIOS_VERSION, "03.03"),
                }
        },
+       {
+               .ident = "Framework Laptop 13 (Phoenix)",
+               .driver_data = &quirk_spurious_8042,
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "Laptop 13 (AMD Ryzen 7040Series)"),
+                       DMI_MATCH(DMI_BIOS_VERSION, "03.05"),
+               }
+       },
        {}
 };
 
index 6b26e48ce8ad2a5f4de6e78751ffec8941610336..7d6079b02589cbacbb203bdc42cfac4e42dd601c 100644 (file)
@@ -7,4 +7,4 @@
 obj-$(CONFIG_AMD_PMF) += amd-pmf.o
 amd-pmf-objs := core.o acpi.o sps.o \
                auto-mode.o cnqf.o \
-               tee-if.o spc.o
+               tee-if.o spc.o pmf-quirks.o
index d0cf46e2fc8e8a073149c61c52b27e9cc9051da6..1157ec148880b54ec145a7ed9353a656e36f0b33 100644 (file)
@@ -343,7 +343,10 @@ static int apmf_if_verify_interface(struct amd_pmf_dev *pdev)
        if (err)
                return err;
 
-       pdev->supported_func = output.supported_functions;
+       /* only set if not already set by a quirk */
+       if (!pdev->supported_func)
+               pdev->supported_func = output.supported_functions;
+
        dev_dbg(pdev->dev, "supported functions:0x%x notifications:0x%x version:%u\n",
                output.supported_functions, output.notification_mask, output.version);
 
@@ -437,7 +440,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev)
 
        status = acpi_walk_resources(ahandle, METHOD_NAME__CRS, apmf_walk_resources, pmf_dev);
        if (ACPI_FAILURE(status)) {
-               dev_err(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
+               dev_dbg(pmf_dev->dev, "acpi_walk_resources failed :%d\n", status);
                return -EINVAL;
        }
 
index 5d4f80698a8b8824bdb59b4e5632ca5f05982c48..64e6e34a2a9acd954f4ce9a916f77673193aba06 100644 (file)
@@ -445,6 +445,7 @@ static int amd_pmf_probe(struct platform_device *pdev)
        mutex_init(&dev->lock);
        mutex_init(&dev->update_mutex);
 
+       amd_pmf_quirks_init(dev);
        apmf_acpi_init(dev);
        platform_set_drvdata(pdev, dev);
        amd_pmf_dbgfs_register(dev);
diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c
new file mode 100644 (file)
index 0000000..0b2eb0a
--- /dev/null
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * AMD Platform Management Framework Driver Quirks
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Mario Limonciello <mario.limonciello@amd.com>
+ */
+
+#include <linux/dmi.h>
+
+#include "pmf.h"
+
+struct quirk_entry {
+       u32 supported_func;
+};
+
+static struct quirk_entry quirk_no_sps_bug = {
+       .supported_func = 0x4003,
+};
+
+static const struct dmi_system_id fwbug_list[] = {
+       {
+               .ident = "ROG Zephyrus G14",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "GA403UV"),
+               },
+               .driver_data = &quirk_no_sps_bug,
+       },
+       {}
+};
+
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev)
+{
+       const struct dmi_system_id *dmi_id;
+       struct quirk_entry *quirks;
+
+       dmi_id = dmi_first_match(fwbug_list);
+       if (!dmi_id)
+               return;
+
+       quirks = dmi_id->driver_data;
+       if (quirks->supported_func) {
+               dev->supported_func = quirks->supported_func;
+               pr_info("Using supported funcs quirk to avoid %s platform firmware bug\n",
+                       dmi_id->ident);
+       }
+}
+
index 8c4df5753f40d48fefc05c6373a64d0a00469149..eeedd0c0395a89704ce360a6aff9f827566b17b2 100644 (file)
@@ -720,4 +720,7 @@ int apmf_check_smart_pc(struct amd_pmf_dev *pmf_dev);
 void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
 void amd_pmf_dump_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in);
 
+/* Quirk infrastructure */
+void amd_pmf_quirks_init(struct amd_pmf_dev *dev);
+
 #endif /* PMF_H */
index 7457ca2b27a60b7adadcebb251dba45a0e675e97..c7a8276458640adc888f99fee23fcc10b5ddf2e0 100644 (file)
@@ -49,6 +49,8 @@ static const struct acpi_device_id intel_hid_ids[] = {
        {"INTC1076", 0},
        {"INTC1077", 0},
        {"INTC1078", 0},
+       {"INTC107B", 0},
+       {"INTC10CB", 0},
        {"", 0},
 };
 MODULE_DEVICE_TABLE(acpi, intel_hid_ids);
@@ -504,6 +506,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
        struct platform_device *device = context;
        struct intel_hid_priv *priv = dev_get_drvdata(&device->dev);
        unsigned long long ev_index;
+       struct key_entry *ke;
        int err;
 
        /*
@@ -545,11 +548,15 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
                if (event == 0xc0 || !priv->array)
                        return;
 
-               if (!sparse_keymap_entry_from_scancode(priv->array, event)) {
+               ke = sparse_keymap_entry_from_scancode(priv->array, event);
+               if (!ke) {
                        dev_info(&device->dev, "unknown event 0x%x\n", event);
                        return;
                }
 
+               if (ke->type == KE_IGNORE)
+                       return;
+
 wakeup:
                pm_wakeup_hard_event(&device->dev);
 
index 08df9494603c5e2acf152aacfe13fce81a18dc2c..30951f7131cd98bfdaffb70b2aa30ee3ceb7dbdd 100644 (file)
@@ -719,6 +719,7 @@ static struct miscdevice isst_if_char_driver = {
 };
 
 static const struct x86_cpu_id hpm_cpu_ids[] = {
+       X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_D,     NULL),
        X86_MATCH_INTEL_FAM6_MODEL(GRANITERAPIDS_X,     NULL),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_CRESTMONT_X,    NULL),
        {}
index bd75d61ff8a66196d620b5ca2824d8bb16332237..ef730200a04bd94682c781be092a43f15f88190e 100644 (file)
@@ -29,7 +29,7 @@
 #include "uncore-frequency-common.h"
 
 #define        UNCORE_MAJOR_VERSION            0
-#define        UNCORE_MINOR_VERSION            1
+#define        UNCORE_MINOR_VERSION            2
 #define UNCORE_HEADER_INDEX            0
 #define UNCORE_FABRIC_CLUSTER_OFFSET   8
 
@@ -329,7 +329,7 @@ static int uncore_probe(struct auxiliary_device *auxdev, const struct auxiliary_
                        goto remove_clusters;
                }
 
-               if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) != UNCORE_MINOR_VERSION)
+               if (TPMI_MINOR_VERSION(pd_info->ufs_header_ver) > UNCORE_MINOR_VERSION)
                        dev_info(&auxdev->dev, "Uncore: Ignore: Unsupported minor version:%lx\n",
                                 TPMI_MINOR_VERSION(pd_info->ufs_header_ver));
 
index 084c355c86f5fa9050ccb881a7efa6682b538773..79bb2c801daa972a74b96596e7129583c7abb39c 100644 (file)
@@ -136,8 +136,6 @@ static int intel_vbtn_input_setup(struct platform_device *device)
        priv->switches_dev->id.bustype = BUS_HOST;
 
        if (priv->has_switches) {
-               detect_tablet_mode(&device->dev);
-
                ret = input_register_device(priv->switches_dev);
                if (ret)
                        return ret;
@@ -258,9 +256,6 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
 
 static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
 {
-       unsigned long long vgbs;
-       acpi_status status;
-
        /* See dual_accel_detect.h for more info */
        if (dual_accel)
                return false;
@@ -268,8 +263,7 @@ static bool intel_vbtn_has_switches(acpi_handle handle, bool dual_accel)
        if (!dmi_check_system(dmi_switches_allow_list))
                return false;
 
-       status = acpi_evaluate_integer(handle, "VGBS", NULL, &vgbs);
-       return ACPI_SUCCESS(status);
+       return acpi_has_method(handle, "VGBS");
 }
 
 static int intel_vbtn_probe(struct platform_device *device)
@@ -316,6 +310,9 @@ static int intel_vbtn_probe(struct platform_device *device)
                if (ACPI_FAILURE(status))
                        dev_err(&device->dev, "Error VBDL failed with ACPI status %d\n", status);
        }
+       // Check switches after buttons since VBDL may have side effects.
+       if (has_switches)
+               detect_tablet_mode(&device->dev);
 
        device_init_wakeup(&device->dev, true);
        /*
index ad3c39e9e9f586d301abd572c83e76d554a5c382..e714ee6298dda8a66637aa918e33c861508ce15e 100644 (file)
@@ -736,7 +736,7 @@ static int acpi_add(struct acpi_device *device)
                default:
                        year = 2019;
                }
-       pr_info("product: %s  year: %d\n", product, year);
+       pr_info("product: %s  year: %d\n", product ?: "unknown", year);
 
        if (year >= 2019)
                battery_limit_use_wmbb = 1;
index 291f14ef67024a35befa2ab2418e69b8c94c8302..77244c9aa60d233dd35316d764158ab6dcc378ae 100644 (file)
@@ -264,6 +264,7 @@ static const struct key_entry toshiba_acpi_keymap[] = {
        { KE_KEY, 0xb32, { KEY_NEXTSONG } },
        { KE_KEY, 0xb33, { KEY_PLAYPAUSE } },
        { KE_KEY, 0xb5a, { KEY_MEDIA } },
+       { KE_IGNORE, 0x0e00, { KEY_RESERVED } }, /* Wake from sleep */
        { KE_IGNORE, 0x1430, { KEY_RESERVED } }, /* Wake from sleep */
        { KE_IGNORE, 0x1501, { KEY_RESERVED } }, /* Output changed */
        { KE_IGNORE, 0x1502, { KEY_RESERVED } }, /* HDMI plugged/unplugged */
@@ -3523,9 +3524,10 @@ static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
                                        (dev->kbd_mode == SCI_KBD_MODE_ON) ?
                                        LED_FULL : LED_OFF);
                break;
+       case 0x8e: /* Power button pressed */
+               break;
        case 0x85: /* Unknown */
        case 0x8d: /* Unknown */
-       case 0x8e: /* Unknown */
        case 0x94: /* Unknown */
        case 0x95: /* Unknown */
        default:
index 043736972cb9216c59a7cb3bc6682e056cdb2373..c8425493b95d855a7562406501b7c803ef481b22 100644 (file)
@@ -172,7 +172,6 @@ struct pwm_chip *dwc_pwm_alloc(struct device *dev)
        dwc->clk_ns = 10;
        chip->ops = &dwc_pwm_ops;
 
-       dev_set_drvdata(dev, chip);
        return chip;
 }
 EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
index 676eaf8d7a53f76672527c1871a306cbcdb9b7ba..fb3eadf6fbc464773b17c30235c51f5a4ff6917f 100644 (file)
@@ -31,26 +31,34 @@ static const struct dwc_pwm_info ehl_pwm_info = {
        .size = 0x1000,
 };
 
-static int dwc_pwm_init_one(struct device *dev, void __iomem *base, unsigned int offset)
+static int dwc_pwm_init_one(struct device *dev, struct dwc_pwm_drvdata *ddata, unsigned int idx)
 {
        struct pwm_chip *chip;
        struct dwc_pwm *dwc;
+       int ret;
 
        chip = dwc_pwm_alloc(dev);
        if (IS_ERR(chip))
                return PTR_ERR(chip);
 
        dwc = to_dwc_pwm(chip);
-       dwc->base = base + offset;
+       dwc->base = ddata->io_base + (ddata->info->size * idx);
 
-       return devm_pwmchip_add(dev, chip);
+       ret = devm_pwmchip_add(dev, chip);
+       if (ret)
+               return ret;
+
+       ddata->chips[idx] = chip;
+       return 0;
 }
 
 static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
 {
        const struct dwc_pwm_info *info;
        struct device *dev = &pci->dev;
-       int i, ret;
+       struct dwc_pwm_drvdata *ddata;
+       unsigned int idx;
+       int ret;
 
        ret = pcim_enable_device(pci);
        if (ret)
@@ -63,17 +71,25 @@ static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
                return dev_err_probe(dev, ret, "Failed to iomap PCI BAR\n");
 
        info = (const struct dwc_pwm_info *)id->driver_data;
-
-       for (i = 0; i < info->nr; i++) {
-               /*
-                * No need to check for pcim_iomap_table() failure,
-                * pcim_iomap_regions() already does it for us.
-                */
-               ret = dwc_pwm_init_one(dev, pcim_iomap_table(pci)[0], i * info->size);
+       ddata = devm_kzalloc(dev, struct_size(ddata, chips, info->nr), GFP_KERNEL);
+       if (!ddata)
+               return -ENOMEM;
+
+       /*
+        * No need to check for pcim_iomap_table() failure,
+        * pcim_iomap_regions() already does it for us.
+        */
+       ddata->io_base = pcim_iomap_table(pci)[0];
+       ddata->info = info;
+
+       for (idx = 0; idx < ddata->info->nr; idx++) {
+               ret = dwc_pwm_init_one(dev, ddata, idx);
                if (ret)
                        return ret;
        }
 
+       dev_set_drvdata(dev, ddata);
+
        pm_runtime_put(dev);
        pm_runtime_allow(dev);
 
@@ -88,19 +104,24 @@ static void dwc_pwm_remove(struct pci_dev *pci)
 
 static int dwc_pwm_suspend(struct device *dev)
 {
-       struct pwm_chip *chip = dev_get_drvdata(dev);
-       struct dwc_pwm *dwc = to_dwc_pwm(chip);
-       int i;
-
-       for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
-               if (chip->pwms[i].state.enabled) {
-                       dev_err(dev, "PWM %u in use by consumer (%s)\n",
-                               i, chip->pwms[i].label);
-                       return -EBUSY;
+       struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+       unsigned int idx;
+
+       for (idx = 0; idx < ddata->info->nr; idx++) {
+               struct pwm_chip *chip = ddata->chips[idx];
+               struct dwc_pwm *dwc = to_dwc_pwm(chip);
+               unsigned int i;
+
+               for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+                       if (chip->pwms[i].state.enabled) {
+                               dev_err(dev, "PWM %u in use by consumer (%s)\n",
+                                       i, chip->pwms[i].label);
+                               return -EBUSY;
+                       }
+                       dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
+                       dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
+                       dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
                }
-               dwc->ctx[i].cnt = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(i));
-               dwc->ctx[i].cnt2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(i));
-               dwc->ctx[i].ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(i));
        }
 
        return 0;
@@ -108,14 +129,19 @@ static int dwc_pwm_suspend(struct device *dev)
 
 static int dwc_pwm_resume(struct device *dev)
 {
-       struct pwm_chip *chip = dev_get_drvdata(dev);
-       struct dwc_pwm *dwc = to_dwc_pwm(chip);
-       int i;
-
-       for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
-               dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
-               dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
-               dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+       struct dwc_pwm_drvdata *ddata = dev_get_drvdata(dev);
+       unsigned int idx;
+
+       for (idx = 0; idx < ddata->info->nr; idx++) {
+               struct pwm_chip *chip = ddata->chips[idx];
+               struct dwc_pwm *dwc = to_dwc_pwm(chip);
+               unsigned int i;
+
+               for (i = 0; i < DWC_TIMERS_TOTAL; i++) {
+                       dwc_pwm_writel(dwc, dwc->ctx[i].cnt, DWC_TIM_LD_CNT(i));
+                       dwc_pwm_writel(dwc, dwc->ctx[i].cnt2, DWC_TIM_LD_CNT2(i));
+                       dwc_pwm_writel(dwc, dwc->ctx[i].ctrl, DWC_TIM_CTRL(i));
+               }
        }
 
        return 0;
index a8b074841ae8054a5a3737127442a1d0e9979e02..c6e2df5a61227131c50fc3c6351326217371c3a3 100644 (file)
@@ -38,6 +38,12 @@ struct dwc_pwm_info {
        unsigned int size;
 };
 
+struct dwc_pwm_drvdata {
+       const struct dwc_pwm_info *info;
+       void __iomem *io_base;
+       struct pwm_chip *chips[];
+};
+
 struct dwc_pwm_ctx {
        u32 cnt;
        u32 cnt2;
index 2c8e964425dc38ca80fa5009b17b4e9dc29bbf10..43778b088ffac54c4a8911f3e41e187f0ae3f364 100644 (file)
@@ -292,13 +292,16 @@ out:
 static void ism_free_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
        clear_bit(dmb->sba_idx, ism->sba_bitmap);
-       dma_free_coherent(&ism->pdev->dev, dmb->dmb_len,
-                         dmb->cpu_addr, dmb->dma_addr);
+       dma_unmap_page(&ism->pdev->dev, dmb->dma_addr, dmb->dmb_len,
+                      DMA_FROM_DEVICE);
+       folio_put(virt_to_folio(dmb->cpu_addr));
 }
 
 static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
 {
+       struct folio *folio;
        unsigned long bit;
+       int rc;
 
        if (PAGE_ALIGN(dmb->dmb_len) > dma_get_max_seg_size(&ism->pdev->dev))
                return -EINVAL;
@@ -315,14 +318,30 @@ static int ism_alloc_dmb(struct ism_dev *ism, struct ism_dmb *dmb)
            test_and_set_bit(dmb->sba_idx, ism->sba_bitmap))
                return -EINVAL;
 
-       dmb->cpu_addr = dma_alloc_coherent(&ism->pdev->dev, dmb->dmb_len,
-                                          &dmb->dma_addr,
-                                          GFP_KERNEL | __GFP_NOWARN |
-                                          __GFP_NOMEMALLOC | __GFP_NORETRY);
-       if (!dmb->cpu_addr)
-               clear_bit(dmb->sba_idx, ism->sba_bitmap);
+       folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC |
+                           __GFP_NORETRY, get_order(dmb->dmb_len));
 
-       return dmb->cpu_addr ? 0 : -ENOMEM;
+       if (!folio) {
+               rc = -ENOMEM;
+               goto out_bit;
+       }
+
+       dmb->cpu_addr = folio_address(folio);
+       dmb->dma_addr = dma_map_page(&ism->pdev->dev,
+                                    virt_to_page(dmb->cpu_addr), 0,
+                                    dmb->dmb_len, DMA_FROM_DEVICE);
+       if (dma_mapping_error(&ism->pdev->dev, dmb->dma_addr)) {
+               rc = -ENOMEM;
+               goto out_free;
+       }
+
+       return 0;
+
+out_free:
+       kfree(dmb->cpu_addr);
+out_bit:
+       clear_bit(dmb->sba_idx, ism->sba_bitmap);
+       return rc;
 }
 
 int ism_register_dmb(struct ism_dev *ism, struct ism_dmb *dmb,
index 097dfe4b620dce85736b8a0d5cf7f4b3c4842e9b..35f8e00850d6cb3e45063c2229c4f7532a9eae40 100644 (file)
@@ -1797,7 +1797,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
        if (dev_is_sata(device)) {
                struct ata_link *link = &device->sata_dev.ap->link;
 
-               rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+               rc = ata_wait_after_reset(link, jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT,
                                          smp_ata_check_ready_type);
        } else {
                msleep(2000);
index 7d2a33514538c2cd8083733d8303f4dc5934de7d..34f96cc35342bcb4ad2e4208d69b19073e6a9bb2 100644 (file)
@@ -2244,7 +2244,15 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
        case SAS_PROTOCOL_SATA | SAS_PROTOCOL_STP:
                if ((dw0 & CMPLT_HDR_RSPNS_XFRD_MSK) &&
                    (sipc_rx_err_type & RX_FIS_STATUS_ERR_MSK)) {
-                       ts->stat = SAS_PROTO_RESPONSE;
+                       if (task->ata_task.use_ncq) {
+                               struct domain_device *device = task->dev;
+                               struct hisi_sas_device *sas_dev = device->lldd_dev;
+
+                               sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+                               slot->abort = 1;
+                       } else {
+                               ts->stat = SAS_PROTO_RESPONSE;
+                       }
                } else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
                        ts->residual = trans_tx_fail_type;
                        ts->stat = SAS_DATA_UNDERRUN;
index 26e6b3e3af4317ca088941bc5bab37c15aebfd32..dcde55c8ee5deadd421b108087605c5c822c3b4c 100644 (file)
@@ -1100,7 +1100,7 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job)
 
                list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) {
                        if (fcport->edif.enable) {
-                               if (pcnt > app_req.num_ports)
+                               if (pcnt >= app_req.num_ports)
                                        break;
 
                                app_reply->elem[pcnt].rekey_count =
index 2e28e2360c85740d0b3ebb391785ee111c78d47b..5b3230ef51fe61bce58ba1cc83bff7cb0a6ddbc1 100644 (file)
@@ -635,10 +635,9 @@ static bool scsi_end_request(struct request *req, blk_status_t error,
        if (blk_queue_add_random(q))
                add_disk_randomness(req->q->disk);
 
-       if (!blk_rq_is_passthrough(req)) {
-               WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED));
-               cmd->flags &= ~SCMD_INITIALIZED;
-       }
+       WARN_ON_ONCE(!blk_rq_is_passthrough(req) &&
+                    !(cmd->flags & SCMD_INITIALIZED));
+       cmd->flags = 0;
 
        /*
         * Calling rcu_barrier() is not necessary here because the
index 386981c6976a53d668632457a47fcf1db609f5fd..baf870a03ecf6c6516f90e599188c659dc986bae 100644 (file)
@@ -285,6 +285,7 @@ sg_open(struct inode *inode, struct file *filp)
        int dev = iminor(inode);
        int flags = filp->f_flags;
        struct request_queue *q;
+       struct scsi_device *device;
        Sg_device *sdp;
        Sg_fd *sfp;
        int retval;
@@ -301,11 +302,12 @@ sg_open(struct inode *inode, struct file *filp)
 
        /* This driver's module count bumped by fops_get in <linux/fs.h> */
        /* Prevent the device driver from vanishing while we sleep */
-       retval = scsi_device_get(sdp->device);
+       device = sdp->device;
+       retval = scsi_device_get(device);
        if (retval)
                goto sg_put;
 
-       retval = scsi_autopm_get_device(sdp->device);
+       retval = scsi_autopm_get_device(device);
        if (retval)
                goto sdp_put;
 
@@ -313,7 +315,7 @@ sg_open(struct inode *inode, struct file *filp)
         * check if O_NONBLOCK. Permits SCSI commands to be issued
         * during error recovery. Tread carefully. */
        if (!((flags & O_NONBLOCK) ||
-             scsi_block_when_processing_errors(sdp->device))) {
+             scsi_block_when_processing_errors(device))) {
                retval = -ENXIO;
                /* we are in error recovery for this device */
                goto error_out;
@@ -344,7 +346,7 @@ sg_open(struct inode *inode, struct file *filp)
 
        if (sdp->open_cnt < 1) {  /* no existing opens */
                sdp->sgdebug = 0;
-               q = sdp->device->request_queue;
+               q = device->request_queue;
                sdp->sg_tablesize = queue_max_segments(q);
        }
        sfp = sg_add_sfp(sdp);
@@ -370,10 +372,11 @@ out_undo:
 error_mutex_locked:
        mutex_unlock(&sdp->open_rel_lock);
 error_out:
-       scsi_autopm_put_device(sdp->device);
+       scsi_autopm_put_device(device);
 sdp_put:
-       scsi_device_put(sdp->device);
-       goto sg_put;
+       kref_put(&sdp->d_ref, sg_device_destroy);
+       scsi_device_put(device);
+       return retval;
 }
 
 /* Release resources associated with a successful sg_open()
@@ -2233,7 +2236,6 @@ sg_remove_sfp_usercontext(struct work_struct *work)
                        "sg_remove_sfp: sfp=0x%p\n", sfp));
        kfree(sfp);
 
-       WARN_ON_ONCE(kref_read(&sdp->d_ref) != 1);
        kref_put(&sdp->d_ref, sg_device_destroy);
        scsi_device_put(device);
        module_put(THIS_MODULE);
index c1fbcdd1618264f0cd09f5e4078ac600ad6dc22a..c40217f44b1bc53d149e8d5ea12c0e5297373800 100644 (file)
@@ -3672,6 +3672,8 @@ static int __init target_core_init_configfs(void)
 {
        struct configfs_subsystem *subsys = &target_core_fabrics;
        struct t10_alua_lu_gp *lu_gp;
+       struct cred *kern_cred;
+       const struct cred *old_cred;
        int ret;
 
        pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage"
@@ -3748,11 +3750,21 @@ static int __init target_core_init_configfs(void)
        if (ret < 0)
                goto out;
 
+       /* We use the kernel credentials to access the target directory */
+       kern_cred = prepare_kernel_cred(&init_task);
+       if (!kern_cred) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       old_cred = override_creds(kern_cred);
        target_init_dbroot();
+       revert_creds(old_cred);
+       put_cred(kern_cred);
 
        return 0;
 
 out:
+       target_xcopy_release_pt();
        configfs_unregister_subsystem(subsys);
        core_dev_release_virtual_lun0();
        rd_module_exit();
index c617e8b9f0ddfe18bcb34155e156af47e4006837..d78d54ae2605e8ab3050dd7a1e68fb13688a78c5 100644 (file)
@@ -616,6 +616,7 @@ void thermal_debug_tz_trip_up(struct thermal_zone_device *tz,
        tze->trip_stats[trip_id].timestamp = now;
        tze->trip_stats[trip_id].max = max(tze->trip_stats[trip_id].max, temperature);
        tze->trip_stats[trip_id].min = min(tze->trip_stats[trip_id].min, temperature);
+       tze->trip_stats[trip_id].count++;
        tze->trip_stats[trip_id].avg = tze->trip_stats[trip_id].avg +
                (temperature - tze->trip_stats[trip_id].avg) /
                tze->trip_stats[trip_id].count;
index 06859e17b67b7777a08d7e5e33b1bfb972cdd6c3..7a00004bfd0361799f1a43be4cb8e9c35e414d9e 100644 (file)
@@ -47,7 +47,7 @@ enum {
        TSTBUS_MAX,
 };
 
-#define QCOM_UFS_MAX_GEAR 4
+#define QCOM_UFS_MAX_GEAR 5
 #define QCOM_UFS_MAX_LANE 2
 
 enum {
@@ -67,26 +67,32 @@ static const struct __ufs_qcom_bw_table {
        [MODE_PWM][UFS_PWM_G2][UFS_LANE_1] = { 1844,            1000 },
        [MODE_PWM][UFS_PWM_G3][UFS_LANE_1] = { 3688,            1000 },
        [MODE_PWM][UFS_PWM_G4][UFS_LANE_1] = { 7376,            1000 },
+       [MODE_PWM][UFS_PWM_G5][UFS_LANE_1] = { 14752,           1000 },
        [MODE_PWM][UFS_PWM_G1][UFS_LANE_2] = { 1844,            1000 },
        [MODE_PWM][UFS_PWM_G2][UFS_LANE_2] = { 3688,            1000 },
        [MODE_PWM][UFS_PWM_G3][UFS_LANE_2] = { 7376,            1000 },
        [MODE_PWM][UFS_PWM_G4][UFS_LANE_2] = { 14752,           1000 },
+       [MODE_PWM][UFS_PWM_G5][UFS_LANE_2] = { 29504,           1000 },
        [MODE_HS_RA][UFS_HS_G1][UFS_LANE_1] = { 127796,         1000 },
        [MODE_HS_RA][UFS_HS_G2][UFS_LANE_1] = { 255591,         1000 },
        [MODE_HS_RA][UFS_HS_G3][UFS_LANE_1] = { 1492582,        102400 },
        [MODE_HS_RA][UFS_HS_G4][UFS_LANE_1] = { 2915200,        204800 },
+       [MODE_HS_RA][UFS_HS_G5][UFS_LANE_1] = { 5836800,        409600 },
        [MODE_HS_RA][UFS_HS_G1][UFS_LANE_2] = { 255591,         1000 },
        [MODE_HS_RA][UFS_HS_G2][UFS_LANE_2] = { 511181,         1000 },
        [MODE_HS_RA][UFS_HS_G3][UFS_LANE_2] = { 1492582,        204800 },
        [MODE_HS_RA][UFS_HS_G4][UFS_LANE_2] = { 2915200,        409600 },
+       [MODE_HS_RA][UFS_HS_G5][UFS_LANE_2] = { 5836800,        819200 },
        [MODE_HS_RB][UFS_HS_G1][UFS_LANE_1] = { 149422,         1000 },
        [MODE_HS_RB][UFS_HS_G2][UFS_LANE_1] = { 298189,         1000 },
        [MODE_HS_RB][UFS_HS_G3][UFS_LANE_1] = { 1492582,        102400 },
        [MODE_HS_RB][UFS_HS_G4][UFS_LANE_1] = { 2915200,        204800 },
+       [MODE_HS_RB][UFS_HS_G5][UFS_LANE_1] = { 5836800,        409600 },
        [MODE_HS_RB][UFS_HS_G1][UFS_LANE_2] = { 298189,         1000 },
        [MODE_HS_RB][UFS_HS_G2][UFS_LANE_2] = { 596378,         1000 },
        [MODE_HS_RB][UFS_HS_G3][UFS_LANE_2] = { 1492582,        204800 },
        [MODE_HS_RB][UFS_HS_G4][UFS_LANE_2] = { 2915200,        409600 },
+       [MODE_HS_RB][UFS_HS_G5][UFS_LANE_2] = { 5836800,        819200 },
        [MODE_MAX][0][0]                    = { 7643136,        307200 },
 };
 
index 20d9762331bd767aa88c7b04d3f4c2e84ff72648..6be3462b109ff29c0d5448a7d3b8f31e068f6adb 100644 (file)
@@ -181,12 +181,14 @@ hv_uio_cleanup(struct hv_device *dev, struct hv_uio_private_data *pdata)
 {
        if (pdata->send_gpadl.gpadl_handle) {
                vmbus_teardown_gpadl(dev->channel, &pdata->send_gpadl);
-               vfree(pdata->send_buf);
+               if (!pdata->send_gpadl.decrypted)
+                       vfree(pdata->send_buf);
        }
 
        if (pdata->recv_gpadl.gpadl_handle) {
                vmbus_teardown_gpadl(dev->channel, &pdata->recv_gpadl);
-               vfree(pdata->recv_buf);
+               if (!pdata->recv_gpadl.decrypted)
+                       vfree(pdata->recv_buf);
        }
 }
 
@@ -295,7 +297,8 @@ hv_uio_probe(struct hv_device *dev,
        ret = vmbus_establish_gpadl(channel, pdata->recv_buf,
                                    RECV_BUFFER_SIZE, &pdata->recv_gpadl);
        if (ret) {
-               vfree(pdata->recv_buf);
+               if (!pdata->recv_gpadl.decrypted)
+                       vfree(pdata->recv_buf);
                goto fail_close;
        }
 
@@ -317,7 +320,8 @@ hv_uio_probe(struct hv_device *dev,
        ret = vmbus_establish_gpadl(channel, pdata->send_buf,
                                    SEND_BUFFER_SIZE, &pdata->send_gpadl);
        if (ret) {
-               vfree(pdata->send_buf);
+               if (!pdata->send_gpadl.decrypted)
+                       vfree(pdata->send_buf);
                goto fail_close;
        }
 
index 045f666b4f12a2a6416c93dafc2189af03662668..8995730ce0bfc82d193bd7128e51817fba43de76 100644 (file)
@@ -2515,7 +2515,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq,
                vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
 
                if (unlikely((u16)(vq->avail_idx - last_avail_idx) > vq->num)) {
-                       vq_err(vq, "Guest moved used index from %u to %u",
+                       vq_err(vq, "Guest moved avail index from %u to %u",
                                last_avail_idx, vq->avail_idx);
                        return -EFAULT;
                }
@@ -2799,9 +2799,19 @@ bool vhost_vq_avail_empty(struct vhost_dev *dev, struct vhost_virtqueue *vq)
        r = vhost_get_avail_idx(vq, &avail_idx);
        if (unlikely(r))
                return false;
+
        vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+       if (vq->avail_idx != vq->last_avail_idx) {
+               /* Since we have updated avail_idx, the following
+                * call to vhost_get_vq_desc() will read available
+                * ring entries. Make sure that read happens after
+                * the avail_idx read.
+                */
+               smp_rmb();
+               return false;
+       }
 
-       return vq->avail_idx == vq->last_avail_idx;
+       return true;
 }
 EXPORT_SYMBOL_GPL(vhost_vq_avail_empty);
 
@@ -2838,9 +2848,19 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
                       &vq->avail->idx, r);
                return false;
        }
+
        vq->avail_idx = vhost16_to_cpu(vq, avail_idx);
+       if (vq->avail_idx != vq->last_avail_idx) {
+               /* Since we have updated avail_idx, the following
+                * call to vhost_get_vq_desc() will read available
+                * ring entries. Make sure that read happens after
+                * the avail_idx read.
+                */
+               smp_rmb();
+               return true;
+       }
 
-       return vq->avail_idx != vq->last_avail_idx;
+       return false;
 }
 EXPORT_SYMBOL_GPL(vhost_enable_notify);
 
index b67a28da47026d0299b8a1f8c22a40fc36b1c4a2..a1c467a0e9f719665fc02fa559d5c94545e5725f 100644 (file)
@@ -68,7 +68,6 @@ out:
 static void vmgenid_notify(struct acpi_device *device, u32 event)
 {
        struct vmgenid_state *state = acpi_driver_data(device);
-       char *envp[] = { "NEW_VMGENID=1", NULL };
        u8 old_id[VMGENID_SIZE];
 
        memcpy(old_id, state->this_id, sizeof(old_id));
@@ -76,7 +75,6 @@ static void vmgenid_notify(struct acpi_device *device, u32 event)
        if (!memcmp(old_id, state->this_id, sizeof(old_id)))
                return;
        add_vmfork_randomness(state->this_id, sizeof(state->this_id));
-       kobject_uevent_env(&device->dev.kobj, KOBJ_CHANGE, envp);
 }
 
 static const struct acpi_device_id vmgenid_ids[] = {
index f173587893cb34cadbfb4c6e548c158522c7749d..9510c551dce864d1e7df97f47a0c24fbcb8b8478 100644 (file)
@@ -362,14 +362,16 @@ static const struct bus_type virtio_bus = {
        .remove = virtio_dev_remove,
 };
 
-int register_virtio_driver(struct virtio_driver *driver)
+int __register_virtio_driver(struct virtio_driver *driver, struct module *owner)
 {
        /* Catch this early. */
        BUG_ON(driver->feature_table_size && !driver->feature_table);
        driver->driver.bus = &virtio_bus;
+       driver->driver.owner = owner;
+
        return driver_register(&driver->driver);
 }
-EXPORT_SYMBOL_GPL(register_virtio_driver);
+EXPORT_SYMBOL_GPL(__register_virtio_driver);
 
 void unregister_virtio_driver(struct virtio_driver *driver)
 {
index 3640f417cce118b06e43ae4c8b38bb275b0097fc..5c180fdc3efbdf09791c7941f3f1522cd6d6f9dc 100644 (file)
@@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
        struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
        struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter = { NULL };
-       struct bkey_s_c_xattr xattr;
        struct posix_acl *acl = NULL;
        struct bkey_s_c k;
        int ret;
@@ -290,28 +289,27 @@ retry:
 
        ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
                        &hash, inode_inum(inode), &search, 0);
-       if (ret) {
-               if (!bch2_err_matches(ret, ENOENT))
-                       acl = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               goto err;
 
        k = bch2_btree_iter_peek_slot(&iter);
        ret = bkey_err(k);
-       if (ret) {
-               acl = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               goto err;
 
-       xattr = bkey_s_c_to_xattr(k);
+       struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
        acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
-                       le16_to_cpu(xattr.v->x_val_len));
+                                le16_to_cpu(xattr.v->x_val_len));
+       ret = PTR_ERR_OR_ZERO(acl);
+err:
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               goto retry;
 
-       if (!IS_ERR(acl))
+       if (ret)
+               acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
+
+       if (!IS_ERR_OR_NULL(acl))
                set_cached_acl(&inode->v, type, acl);
-out:
-       if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
-               goto retry;
 
        bch2_trans_iter_exit(trans, &iter);
        bch2_trans_put(trans);
index 114328acde7202ed201fc8e776ed9cd73176d765..fadb1078903d291ce6ce3c7928d79c71c1eb18f8 100644 (file)
@@ -49,13 +49,15 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
        if (!bch2_dev_exists2(c, bp.k->p.inode))
                return 0;
 
+       struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode);
        struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
        int ret = 0;
 
-       bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
+       bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size ||
+                        !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)),
                         c, err,
-                        backpointer_pos_wrong,
-                        "backpointer at wrong pos");
+                        backpointer_bucket_offset_wrong,
+                        "backpointer bucket_offset wrong");
 fsck_err:
        return ret;
 }
index da012ca7daee5501fe04be48bc875c918abbb33a..85949b9fd880ce2fcce508ba4018350a5dfac9ca 100644 (file)
@@ -53,14 +53,11 @@ static inline struct bpos bucket_pos_to_bp(const struct bch_fs *c,
                                           u64 bucket_offset)
 {
        struct bch_dev *ca = bch_dev_bkey_exists(c, bucket.inode);
-       struct bpos ret;
-
-       ret = POS(bucket.inode,
-                 (bucket_to_sector(ca, bucket.offset) <<
-                  MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
+       struct bpos ret = POS(bucket.inode,
+                             (bucket_to_sector(ca, bucket.offset) <<
+                              MAX_EXTENT_COMPRESS_RATIO_SHIFT) + bucket_offset);
 
        EBUG_ON(!bkey_eq(bucket, bp_pos_to_bucket(c, ret)));
-
        return ret;
 }
 
index a31a5f706929eb2006e4867a38123b9526639cee..91c3c1fef233d118fb083dae3a60a5e779e3cdaf 100644 (file)
@@ -709,6 +709,8 @@ struct btree_trans_buf {
        x(stripe_delete)                                                \
        x(reflink)                                                      \
        x(fallocate)                                                    \
+       x(fsync)                                                        \
+       x(dio_write)                                                    \
        x(discard)                                                      \
        x(discard_fast)                                                 \
        x(invalidate)                                                   \
index 63102992d9556d1b33b445a3116df61964a6ca01..085987435a5ea3cfc7354db7ee5392ce62241f05 100644 (file)
@@ -578,7 +578,8 @@ struct bch_member {
        __le64                  nbuckets;       /* device size */
        __le16                  first_bucket;   /* index of first bucket used */
        __le16                  bucket_size;    /* sectors */
-       __le32                  pad;
+       __u8                    btree_bitmap_shift;
+       __u8                    pad[3];
        __le64                  last_mount;     /* time_t */
 
        __le64                  flags;
@@ -587,6 +588,7 @@ struct bch_member {
        __le64                  errors_at_reset[BCH_MEMBER_ERROR_NR];
        __le64                  errors_reset_time;
        __le64                  seq;
+       __le64                  btree_allocated_bitmap;
 };
 
 #define BCH_MEMBER_V1_BYTES    56
@@ -876,7 +878,8 @@ struct bch_sb_field_downgrade {
        x(rebalance_work,               BCH_VERSION(1,  3))             \
        x(member_seq,                   BCH_VERSION(1,  4))             \
        x(subvolume_fs_parent,          BCH_VERSION(1,  5))             \
-       x(btree_subvolume_children,     BCH_VERSION(1,  6))
+       x(btree_subvolume_children,     BCH_VERSION(1,  6))             \
+       x(mi_btree_bitmap,              BCH_VERSION(1,  7))
 
 enum bcachefs_metadata_version {
        bcachefs_metadata_version_min = 9,
@@ -1314,7 +1317,7 @@ static inline __u64 __bset_magic(struct bch_sb *sb)
        x(write_buffer_keys,    11)             \
        x(datetime,             12)
 
-enum {
+enum bch_jset_entry_type {
 #define x(f, nr)       BCH_JSET_ENTRY_##f      = nr,
        BCH_JSET_ENTRY_TYPES()
 #undef x
@@ -1360,7 +1363,7 @@ struct jset_entry_blacklist_v2 {
        x(inodes,               1)              \
        x(key_version,          2)
 
-enum {
+enum bch_fs_usage_type {
 #define x(f, nr)       BCH_FS_USAGE_##f        = nr,
        BCH_FS_USAGE_TYPES()
 #undef x
@@ -1535,6 +1538,20 @@ enum btree_id {
        BTREE_ID_NR
 };
 
+static inline bool btree_id_is_alloc(enum btree_id id)
+{
+       switch (id) {
+       case BTREE_ID_alloc:
+       case BTREE_ID_backpointers:
+       case BTREE_ID_need_discard:
+       case BTREE_ID_freespace:
+       case BTREE_ID_bucket_gens:
+               return true;
+       default:
+               return false;
+       }
+}
+
 #define BTREE_MAX_DEPTH                4U
 
 /* Btree nodes */
index cf23ff47bed8be588593a7fb193ee21ca8298c65..3a45d128f608db86d060d43573219d60762e3038 100644 (file)
@@ -314,6 +314,12 @@ static inline unsigned bkeyp_key_u64s(const struct bkey_format *format,
        return bkey_packed(k) ? format->key_u64s : BKEY_U64s;
 }
 
+static inline bool bkeyp_u64s_valid(const struct bkey_format *f,
+                                   const struct bkey_packed *k)
+{
+       return ((unsigned) k->u64s - bkeyp_key_u64s(f, k) <= U8_MAX - BKEY_U64s);
+}
+
 static inline unsigned bkeyp_key_bytes(const struct bkey_format *format,
                                       const struct bkey_packed *k)
 {
index 5e52684764eb14de4d8433abd5954a829648440b..db336a43fc083a79615e81ce9da37ff4877005f9 100644 (file)
@@ -171,11 +171,15 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
        if (type >= BKEY_TYPE_NR)
                return 0;
 
-       bkey_fsck_err_on((flags & BKEY_INVALID_COMMIT) &&
+       bkey_fsck_err_on((type == BKEY_TYPE_btree ||
+                         (flags & BKEY_INVALID_COMMIT)) &&
                         !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err,
                         bkey_invalid_type_for_btree,
                         "invalid key type for btree %s (%s)",
-                        bch2_btree_node_type_str(type), bch2_bkey_types[k.k->type]);
+                        bch2_btree_node_type_str(type),
+                        k.k->type < KEY_TYPE_MAX
+                        ? bch2_bkey_types[k.k->type]
+                        : "(unknown)");
 
        if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) {
                bkey_fsck_err_on(k.k->size == 0, c, err,
index 84474324dba9b508141f0e886bafbd8a95d47537..02c70e813face0ce975f1f700e55a34743d286ea 100644 (file)
@@ -709,9 +709,31 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
-       u32 seq;
 
-       BUG_ON(level + 1 >= BTREE_MAX_DEPTH);
+       if (unlikely(level >= BTREE_MAX_DEPTH)) {
+               int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u",
+                                                level, BTREE_MAX_DEPTH);
+               return ERR_PTR(ret);
+       }
+
+       if (unlikely(!bkey_is_btree_ptr(&k->k))) {
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+               int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf);
+               printbuf_exit(&buf);
+               return ERR_PTR(ret);
+       }
+
+       if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) {
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+               int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf);
+               printbuf_exit(&buf);
+               return ERR_PTR(ret);
+       }
+
        /*
         * Parent node must be locked, else we could read in a btree node that's
         * been freed:
@@ -752,34 +774,26 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans,
        }
 
        set_btree_node_read_in_flight(b);
-
        six_unlock_write(&b->c.lock);
-       seq = six_lock_seq(&b->c.lock);
-       six_unlock_intent(&b->c.lock);
 
-       /* Unlock before doing IO: */
-       if (path && sync)
-               bch2_trans_unlock_noassert(trans);
-
-       bch2_btree_node_read(trans, b, sync);
+       if (path) {
+               u32 seq = six_lock_seq(&b->c.lock);
 
-       if (!sync)
-               return NULL;
+               /* Unlock before doing IO: */
+               six_unlock_intent(&b->c.lock);
+               bch2_trans_unlock_noassert(trans);
 
-       if (path) {
-               int ret = bch2_trans_relock(trans) ?:
-                       bch2_btree_path_relock_intent(trans, path);
-               if (ret) {
-                       BUG_ON(!trans->restarted);
-                       return ERR_PTR(ret);
-               }
-       }
+               bch2_btree_node_read(trans, b, sync);
 
-       if (!six_relock_type(&b->c.lock, lock_type, seq)) {
-               BUG_ON(!path);
+               if (!sync)
+                       return NULL;
 
-               trace_and_count(c, trans_restart_relock_after_fill, trans, _THIS_IP_, path);
-               return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_relock_after_fill));
+               if (!six_relock_type(&b->c.lock, lock_type, seq))
+                       b = NULL;
+       } else {
+               bch2_btree_node_read(trans, b, sync);
+               if (lock_type == SIX_LOCK_read)
+                       six_lock_downgrade(&b->c.lock);
        }
 
        return b;
@@ -1112,18 +1126,19 @@ int bch2_btree_node_prefetch(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_cache *bc = &c->btree_cache;
-       struct btree *b;
 
        BUG_ON(path && !btree_node_locked(path, level + 1));
        BUG_ON(level >= BTREE_MAX_DEPTH);
 
-       b = btree_cache_find(bc, k);
+       struct btree *b = btree_cache_find(bc, k);
        if (b)
                return 0;
 
        b = bch2_btree_node_fill(trans, path, k, btree_id,
                                 level, SIX_LOCK_read, false);
-       return PTR_ERR_OR_ZERO(b);
+       if (!IS_ERR_OR_NULL(b))
+               six_unlock_read(&b->c.lock);
+       return bch2_trans_relock(trans) ?: PTR_ERR_OR_ZERO(b);
 }
 
 void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
@@ -1148,6 +1163,8 @@ wait_on_io:
 
        btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent);
        btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write);
+       if (unlikely(b->hash_val != btree_ptr_hash_val(k)))
+               goto out;
 
        if (btree_node_dirty(b)) {
                __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim);
@@ -1162,7 +1179,7 @@ wait_on_io:
        btree_node_data_free(c, b);
        bch2_btree_node_hash_remove(bc, b);
        mutex_unlock(&bc->lock);
-
+out:
        six_unlock_write(&b->c.lock);
        six_unlock_intent(&b->c.lock);
 }
index 6280da1244b55032beaf60c4e2b29df0ff2c3152..ecbd9598f69fd00e86efbe7537a134d6d4c4db06 100644 (file)
@@ -368,11 +368,16 @@ again:
                                buf.buf)) {
                        bch2_btree_node_evict(trans, cur_k.k);
                        cur = NULL;
-                       ret =   bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?:
-                               bch2_journal_key_delete(c, b->c.btree_id,
-                                                       b->c.level, cur_k.k->k.p);
+                       ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                     b->c.level, cur_k.k->k.p);
                        if (ret)
                                break;
+
+                       if (!btree_id_is_alloc(b->c.btree_id)) {
+                               ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+                               if (ret)
+                                       break;
+                       }
                        continue;
                }
 
@@ -544,12 +549,12 @@ reconstruct_root:
                                bch2_btree_root_alloc_fake(c, i, 0);
                        } else {
                                bch2_btree_root_alloc_fake(c, i, 1);
+                               bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
                                ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
                                if (ret)
                                        break;
                        }
 
-                       bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
                        reconstructed_root = true;
                }
 
@@ -823,6 +828,7 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
        struct bch_fs *c = trans->c;
        struct bkey deleted = KEY(0, 0, 0);
        struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL };
+       struct printbuf buf = PRINTBUF;
        int ret = 0;
 
        deleted.p = k->k->p;
@@ -843,11 +849,23 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
        if (ret)
                goto err;
 
+       if (mustfix_fsck_err_on(level && !bch2_dev_btree_bitmap_marked(c, *k),
+                               c, btree_bitmap_not_marked,
+                               "btree ptr not marked in member info btree allocated bitmap\n  %s",
+                               (bch2_bkey_val_to_text(&buf, c, *k),
+                                buf.buf))) {
+               mutex_lock(&c->sb_lock);
+               bch2_dev_btree_bitmap_mark(c, *k);
+               bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
+       }
+
        ret = commit_do(trans, NULL, NULL, 0,
                        bch2_key_trigger(trans, btree_id, level, old,
                                         unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
 fsck_err:
 err:
+       printbuf_exit(&buf);
        bch_err_fn(c, ret);
        return ret;
 }
index d7de82ac389354f9a0d5eef0a66c8694f1752b94..9678b2375bedde868e7a168435c9a17fc74eb26a 100644 (file)
@@ -831,7 +831,7 @@ static int bset_key_invalid(struct bch_fs *c, struct btree *b,
                (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0);
 }
 
-static bool __bkey_valid(struct bch_fs *c, struct btree *b,
+static bool bkey_packed_valid(struct bch_fs *c, struct btree *b,
                         struct bset *i, struct bkey_packed *k)
 {
        if (bkey_p_next(k) > vstruct_last(i))
@@ -840,7 +840,7 @@ static bool __bkey_valid(struct bch_fs *c, struct btree *b,
        if (k->format > KEY_FORMAT_CURRENT)
                return false;
 
-       if (k->u64s < bkeyp_key_u64s(&b->format, k))
+       if (!bkeyp_u64s_valid(&b->format, k))
                return false;
 
        struct printbuf buf = PRINTBUF;
@@ -884,11 +884,13 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
                                 "invalid bkey format %u", k->format))
                        goto drop_this_key;
 
-               if (btree_err_on(k->u64s < bkeyp_key_u64s(&b->format, k),
+               if (btree_err_on(!bkeyp_u64s_valid(&b->format, k),
                                 -BCH_ERR_btree_node_read_err_fixable,
                                 c, NULL, b, i,
                                 btree_node_bkey_bad_u64s,
-                                "k->u64s too small (%u < %u)", k->u64s, bkeyp_key_u64s(&b->format, k)))
+                                "bad k->u64s %u (min %u max %lu)", k->u64s,
+                                bkeyp_key_u64s(&b->format, k),
+                                U8_MAX - BKEY_U64s + bkeyp_key_u64s(&b->format, k)))
                        goto drop_this_key;
 
                if (!write)
@@ -947,13 +949,12 @@ drop_this_key:
                         * do
                         */
 
-                       if (!__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
+                       if (!bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key))) {
                                for (next_good_key = 1;
                                     next_good_key < (u64 *) vstruct_last(i) - (u64 *) k;
                                     next_good_key++)
-                                       if (__bkey_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
+                                       if (bkey_packed_valid(c, b, i, (void *) ((u64 *) k + next_good_key)))
                                                goto got_good_key;
-
                        }
 
                        /*
@@ -1339,7 +1340,9 @@ start:
                               rb->start_time);
        bio_put(&rb->bio);
 
-       if (saw_error && !btree_node_read_error(b)) {
+       if (saw_error &&
+           !btree_node_read_error(b) &&
+           c->curr_recovery_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes) {
                printbuf_reset(&buf);
                bch2_bpos_to_text(&buf, b->key.k.p);
                bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
index 24772538e4cc74ada59851bd7847dd5ece5ea122..1c70836dd7cce4988ef8cf166ee0797fd8f8269e 100644 (file)
@@ -498,8 +498,13 @@ static inline void set_btree_iter_dontneed(struct btree_iter *iter)
 {
        struct btree_trans *trans = iter->trans;
 
-       if (!trans->restarted)
-               btree_iter_path(trans, iter)->preserve = false;
+       if (!iter->path || trans->restarted)
+               return;
+
+       struct btree_path *path = btree_iter_path(trans, iter);
+       path->preserve          = false;
+       if (path->ref == 1)
+               path->should_be_locked  = false;
 }
 
 void *__bch2_trans_kmalloc(struct btree_trans *, size_t);
@@ -642,7 +647,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
 
 static inline int btree_trans_too_many_iters(struct btree_trans *trans)
 {
-       if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
+       if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
                return __bch2_btree_trans_too_many_iters(trans);
 
        return 0;
index 5cbcbfe85235b8de3777ae82b120d4627f99c8d7..1e8cf49a69353198774a0e5b798c2f1f135041fa 100644 (file)
@@ -130,12 +130,30 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
        return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
 }
 
+static void journal_iter_verify(struct journal_iter *iter)
+{
+       struct journal_keys *keys = iter->keys;
+       size_t gap_size = keys->size - keys->nr;
+
+       BUG_ON(iter->idx >= keys->gap &&
+              iter->idx <  keys->gap + gap_size);
+
+       if (iter->idx < keys->size) {
+               struct journal_key *k = keys->data + iter->idx;
+
+               int cmp = cmp_int(k->btree_id,  iter->btree_id) ?:
+                         cmp_int(k->level,     iter->level);
+               BUG_ON(cmp < 0);
+       }
+}
+
 static void journal_iters_fix(struct bch_fs *c)
 {
        struct journal_keys *keys = &c->journal_keys;
        /* The key we just inserted is immediately before the gap: */
        size_t gap_end = keys->gap + (keys->size - keys->nr);
-       struct btree_and_journal_iter *iter;
+       struct journal_key *new_key = &keys->data[keys->gap - 1];
+       struct journal_iter *iter;
 
        /*
         * If an iterator points one after the key we just inserted, decrement
@@ -143,9 +161,14 @@ static void journal_iters_fix(struct bch_fs *c)
         * decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
         * handle that:
         */
-       list_for_each_entry(iter, &c->journal_iters, journal.list)
-               if (iter->journal.idx == gap_end)
-                       iter->journal.idx = keys->gap - 1;
+       list_for_each_entry(iter, &c->journal_iters, list) {
+               journal_iter_verify(iter);
+               if (iter->idx           == gap_end &&
+                   new_key->btree_id   == iter->btree_id &&
+                   new_key->level      == iter->level)
+                       iter->idx = keys->gap - 1;
+               journal_iter_verify(iter);
+       }
 }
 
 static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
@@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
        if (idx > keys->gap)
                idx -= keys->size - keys->nr;
 
+       size_t old_gap = keys->gap;
+
        if (keys->nr == keys->size) {
+               journal_iters_move_gap(c, old_gap, keys->size);
+               old_gap = keys->size;
+
                struct journal_keys new_keys = {
                        .nr                     = keys->nr,
                        .size                   = max_t(size_t, keys->size, 8) * 2,
@@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
                keys->gap       = keys->nr;
        }
 
-       journal_iters_move_gap(c, keys->gap, idx);
+       journal_iters_move_gap(c, old_gap, idx);
 
        move_gap(keys, idx);
 
@@ -301,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
 
 static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
 {
-       struct journal_key *k = iter->keys->data + iter->idx;
+       journal_iter_verify(iter);
+
+       while (iter->idx < iter->keys->size) {
+               struct journal_key *k = iter->keys->data + iter->idx;
+
+               int cmp = cmp_int(k->btree_id,  iter->btree_id) ?:
+                         cmp_int(k->level,     iter->level);
+               if (cmp > 0)
+                       break;
+               BUG_ON(cmp);
 
-       while (k < iter->keys->data + iter->keys->size &&
-              k->btree_id      == iter->btree_id &&
-              k->level         == iter->level) {
                if (!k->overwritten)
                        return bkey_i_to_s_c(k->k);
 
                bch2_journal_iter_advance(iter);
-               k = iter->keys->data + iter->idx;
        }
 
        return bkey_s_c_null;
@@ -330,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c,
        iter->level     = level;
        iter->keys      = &c->journal_keys;
        iter->idx       = bch2_journal_key_search(&c->journal_keys, id, level, pos);
+
+       journal_iter_verify(iter);
 }
 
 static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@@ -434,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
        iter->trans = trans;
        iter->b = b;
        iter->node_iter = node_iter;
-       bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
-       INIT_LIST_HEAD(&iter->journal.list);
        iter->pos = b->data->min_key;
        iter->at_end = false;
+       INIT_LIST_HEAD(&iter->journal.list);
+
+       if (trans->journal_replay_not_finished) {
+               bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
+               if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+                       list_add(&iter->journal.list, &trans->c->journal_iters);
+       }
 }
 
 /*
@@ -452,9 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
 
        bch2_btree_node_iter_init_from_start(&node_iter, b);
        __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
-       if (trans->journal_replay_not_finished &&
-           !test_bit(BCH_FS_may_go_rw, &trans->c->flags))
-               list_add(&iter->journal.list, &trans->c->journal_iters);
 }
 
 /* sort and dedup all keys in the journal: */
index 581edcb0911bfa39e9ec6242686bd213c47f352c..88a3582a32757e34a28eb37143f9ff78a88a4085 100644 (file)
@@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
        } else {
                mutex_lock(&bc->lock);
                list_move_tail(&ck->list, &bc->freed_pcpu);
+               bc->nr_freed_pcpu++;
                mutex_unlock(&bc->lock);
        }
 }
@@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
                if (!list_empty(&bc->freed_pcpu)) {
                        ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
                        list_del_init(&ck->list);
+                       bc->nr_freed_pcpu--;
                }
                mutex_unlock(&bc->lock);
        }
@@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                commit_flags |= BCH_WATERMARK_reclaim;
 
        if (ck->journal.seq != journal_last_seq(j) ||
-           j->watermark == BCH_WATERMARK_stripe)
+           !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
                commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
 
        ret   = bch2_btree_iter_traverse(&b_iter) ?:
index b9b151e693ed60ecc3dc9147cc34902643cfc7aa..f2caf491957efc2345c082323516e58fe2a35302 100644 (file)
@@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
                                       struct btree_path *path,
                                       struct btree_bkey_cached_common *b)
 {
-       struct btree_path *linked;
-       unsigned i, iter;
-       int ret;
-
-       /*
-        * XXX BIG FAT NOTICE
-        *
-        * Drop all read locks before taking a write lock:
-        *
-        * This is a hack, because bch2_btree_node_lock_write_nofail() is a
-        * hack - but by dropping read locks first, this should never fail, and
-        * we only use this in code paths where whatever read locks we've
-        * already taken are no longer needed:
-        */
-
-       trans_for_each_path(trans, linked, iter) {
-               if (!linked->nodes_locked)
-                       continue;
-
-               for (i = 0; i < BTREE_MAX_DEPTH; i++)
-                       if (btree_node_read_locked(linked, i)) {
-                               btree_node_unlock(trans, linked, i);
-                               btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
-                       }
-       }
-
-       ret = __btree_node_lock_write(trans, path, b, true);
+       int ret = __btree_node_lock_write(trans, path, b, true);
        BUG_ON(ret);
 }
 
index 3f33be7e5e5c26d9be6d0f1431ee04c3e545b825..866bd278439f8bb72a0b1e31e672953ff9b3f839 100644 (file)
@@ -133,6 +133,19 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
        if (le64_to_cpu(bn->magic) != bset_magic(c))
                return;
 
+       if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(&bn->keys))) {
+               struct nonce nonce = btree_nonce(&bn->keys, 0);
+               unsigned bytes = (void *) &bn->keys - (void *) &bn->flags;
+
+               bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
+       }
+
+       if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+               return;
+
+       if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
+               return;
+
        rcu_read_lock();
        struct found_btree_node n = {
                .btree_id       = BTREE_NODE_ID(bn),
@@ -192,8 +205,13 @@ static int read_btree_nodes_worker(void *p)
                                last_print = jiffies;
                        }
 
-                       try_read_btree_node(w->f, ca, bio, buf,
-                                           bucket * ca->mi.bucket_size + bucket_offset);
+                       u64 sector = bucket * ca->mi.bucket_size + bucket_offset;
+
+                       if (c->sb.version_upgrade_complete >= bcachefs_metadata_version_mi_btree_bitmap &&
+                           !bch2_dev_btree_bitmap_marked_sectors(ca, sector, btree_sectors(c)))
+                               continue;
+
+                       try_read_btree_node(w->f, ca, bio, buf, sector);
                }
 err:
        bio_put(bio);
@@ -213,6 +231,9 @@ static int read_btree_nodes(struct find_btree_nodes *f)
        closure_init_stack(&cl);
 
        for_each_online_member(c, ca) {
+               if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree)))
+                       continue;
+
                struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
                struct task_struct *t;
 
@@ -290,7 +311,7 @@ again:
                        found_btree_node_to_text(&buf, c, n);
                        bch_err(c, "%s", buf.buf);
                        printbuf_exit(&buf);
-                       return -1;
+                       return -BCH_ERR_fsck_repair_unimplemented;
                }
        }
 
@@ -436,6 +457,9 @@ bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
 int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
                           unsigned level, struct bpos node_min, struct bpos node_max)
 {
+       if (btree_id_is_alloc(btree))
+               return 0;
+
        struct find_btree_nodes *f = &c->found_btree_nodes;
 
        int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
index aa9da49707404015a558c9c6e9339b733d0c98c3..bbec91e8e6506fa32611b340dc1a3a4a104aeed6 100644 (file)
@@ -397,12 +397,13 @@ static int btree_key_can_insert_cached(struct btree_trans *trans, unsigned flags
        struct bkey_cached *ck = (void *) path->l[0].b;
        unsigned new_u64s;
        struct bkey_i *new_k;
+       unsigned watermark = flags & BCH_WATERMARK_MASK;
 
        EBUG_ON(path->level);
 
-       if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
-           bch2_btree_key_cache_must_wait(c) &&
-           !(flags & BCH_TRANS_COMMIT_journal_reclaim))
+       if (watermark < BCH_WATERMARK_reclaim &&
+           !test_bit(BKEY_CACHED_DIRTY, &ck->flags) &&
+           bch2_btree_key_cache_must_wait(c))
                return -BCH_ERR_btree_insert_need_journal_reclaim;
 
        /*
@@ -499,9 +500,8 @@ static int run_one_trans_trigger(struct btree_trans *trans, struct btree_insert_
 }
 
 static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
-                             struct btree_insert_entry *btree_id_start)
+                             unsigned btree_id_start)
 {
-       struct btree_insert_entry *i;
        bool trans_trigger_run;
        int ret, overwrite;
 
@@ -514,13 +514,13 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
                do {
                        trans_trigger_run = false;
 
-                       for (i = btree_id_start;
-                            i < trans->updates + trans->nr_updates && i->btree_id <= btree_id;
+                       for (unsigned i = btree_id_start;
+                            i < trans->nr_updates && trans->updates[i].btree_id <= btree_id;
                             i++) {
-                               if (i->btree_id != btree_id)
+                               if (trans->updates[i].btree_id != btree_id)
                                        continue;
 
-                               ret = run_one_trans_trigger(trans, i, overwrite);
+                               ret = run_one_trans_trigger(trans, trans->updates + i, overwrite);
                                if (ret < 0)
                                        return ret;
                                if (ret)
@@ -534,8 +534,7 @@ static int run_btree_triggers(struct btree_trans *trans, enum btree_id btree_id,
 
 static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
 {
-       struct btree_insert_entry *btree_id_start = trans->updates;
-       unsigned btree_id = 0;
+       unsigned btree_id = 0, btree_id_start = 0;
        int ret = 0;
 
        /*
@@ -549,8 +548,8 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
                if (btree_id == BTREE_ID_alloc)
                        continue;
 
-               while (btree_id_start < trans->updates + trans->nr_updates &&
-                      btree_id_start->btree_id < btree_id)
+               while (btree_id_start < trans->nr_updates &&
+                      trans->updates[btree_id_start].btree_id < btree_id)
                        btree_id_start++;
 
                ret = run_btree_triggers(trans, btree_id, btree_id_start);
@@ -558,11 +557,13 @@ static int bch2_trans_commit_run_triggers(struct btree_trans *trans)
                        return ret;
        }
 
-       trans_for_each_update(trans, i) {
+       for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+               struct btree_insert_entry *i = trans->updates + idx;
+
                if (i->btree_id > BTREE_ID_alloc)
                        break;
                if (i->btree_id == BTREE_ID_alloc) {
-                       ret = run_btree_triggers(trans, BTREE_ID_alloc, i);
+                       ret = run_btree_triggers(trans, BTREE_ID_alloc, idx);
                        if (ret)
                                return ret;
                        break;
@@ -826,7 +827,8 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags
        struct bch_fs *c = trans->c;
        int ret = 0, u64s_delta = 0;
 
-       trans_for_each_update(trans, i) {
+       for (unsigned idx = 0; idx < trans->nr_updates; idx++) {
+               struct btree_insert_entry *i = trans->updates + idx;
                if (i->cached)
                        continue;
 
index 9404d96c38f3b368726a6603b601b241b5106100..e0c982a4195c764ab8a415b5f7f80cbff88c1935 100644 (file)
@@ -364,7 +364,21 @@ struct btree_insert_entry {
        unsigned long           ip_allocated;
 };
 
+/* Number of btree paths we preallocate, usually enough */
 #define BTREE_ITER_INITIAL             64
+/*
+ * Lmiit for btree_trans_too_many_iters(); this is enough that almost all code
+ * paths should run inside this limit, and if they don't it usually indicates a
+ * bug (leaking/duplicated btree paths).
+ *
+ * exception: some fsck paths
+ *
+ * bugs with excessive path usage seem to have possibly been eliminated now, so
+ * we might consider eliminating this (and btree_trans_too_many_iter()) at some
+ * point.
+ */
+#define BTREE_ITER_NORMAL_LIMIT                256
+/* never exceed limit */
 #define BTREE_ITER_MAX                 (1U << 10)
 
 struct btree_trans_commit_hook;
index 32397b99752fd2ec3cfd553724c97c7f217ca56e..6030c396754f6f494c3c137abd313f6bf80c2ffb 100644 (file)
 #include "keylist.h"
 #include "recovery_passes.h"
 #include "replicas.h"
+#include "sb-members.h"
 #include "super-io.h"
 #include "trace.h"
 
 #include <linux/random.h>
 
-const char * const bch2_btree_update_modes[] = {
+static const char * const bch2_btree_update_modes[] = {
 #define x(t) #t,
-       BCH_WATERMARKS()
+       BTREE_UPDATE_MODES()
 #undef x
        NULL
 };
@@ -605,6 +606,26 @@ static void btree_update_add_key(struct btree_update *as,
        bch2_keylist_push(keys);
 }
 
+static bool btree_update_new_nodes_marked_sb(struct btree_update *as)
+{
+       for_each_keylist_key(&as->new_keys, k)
+               if (!bch2_dev_btree_bitmap_marked(as->c, bkey_i_to_s_c(k)))
+                       return false;
+       return true;
+}
+
+static void btree_update_new_nodes_mark_sb(struct btree_update *as)
+{
+       struct bch_fs *c = as->c;
+
+       mutex_lock(&c->sb_lock);
+       for_each_keylist_key(&as->new_keys, k)
+               bch2_dev_btree_bitmap_mark(c, bkey_i_to_s_c(k));
+
+       bch2_write_super(c);
+       mutex_unlock(&c->sb_lock);
+}
+
 /*
  * The transactional part of an interior btree node update, where we journal the
  * update we did to the interior node and update alloc info:
@@ -662,6 +683,9 @@ static void btree_update_nodes_written(struct btree_update *as)
        if (ret)
                goto err;
 
+       if (!btree_update_new_nodes_marked_sb(as))
+               btree_update_new_nodes_mark_sb(as);
+
        /*
         * Wait for any in flight writes to finish before we free the old nodes
         * on disk:
@@ -704,9 +728,13 @@ static void btree_update_nodes_written(struct btree_update *as)
        bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
                             "%s", bch2_err_str(ret));
 err:
-       if (as->b) {
-
-               b = as->b;
+       /*
+        * We have to be careful because another thread might be getting ready
+        * to free as->b and calling btree_update_reparent() on us - we'll
+        * recheck under btree_update_lock below:
+        */
+       b = READ_ONCE(as->b);
+       if (b) {
                btree_path_idx_t path_idx = get_unlocked_mut_path(trans,
                                                as->btree_id, b->c.level, b->key.k.p);
                struct btree_path *path = trans->paths + path_idx;
@@ -850,15 +878,17 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
 {
        struct bch_fs *c = as->c;
 
-       mutex_lock(&c->btree_interior_update_lock);
-       list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
-
        BUG_ON(as->mode != BTREE_UPDATE_none);
+       BUG_ON(as->update_level_end < b->c.level);
        BUG_ON(!btree_node_dirty(b));
        BUG_ON(!b->c.level);
 
+       mutex_lock(&c->btree_interior_update_lock);
+       list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+
        as->mode        = BTREE_UPDATE_node;
        as->b           = b;
+       as->update_level_end = b->c.level;
 
        set_btree_node_write_blocked(b);
        list_add(&as->write_blocked_list, &b->write_blocked);
@@ -1100,7 +1130,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
 
 static struct btree_update *
 bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
-                       unsigned level, bool split, unsigned flags)
+                       unsigned level_start, bool split, unsigned flags)
 {
        struct bch_fs *c = trans->c;
        struct btree_update *as;
@@ -1108,7 +1138,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        int disk_res_flags = (flags & BCH_TRANS_COMMIT_no_enospc)
                ? BCH_DISK_RESERVATION_NOFAIL : 0;
        unsigned nr_nodes[2] = { 0, 0 };
-       unsigned update_level = level;
+       unsigned level_end = level_start;
        enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
        int ret = 0;
        u32 restart_count = trans->restart_count;
@@ -1123,34 +1153,30 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        flags &= ~BCH_WATERMARK_MASK;
        flags |= watermark;
 
-       if (watermark < c->journal.watermark) {
-               struct journal_res res = { 0 };
-               unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
+       if (watermark < BCH_WATERMARK_reclaim &&
+           test_bit(JOURNAL_SPACE_LOW, &c->journal.flags)) {
+               if (flags & BCH_TRANS_COMMIT_journal_reclaim)
+                       return ERR_PTR(-BCH_ERR_journal_reclaim_would_deadlock);
 
-               if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark < BCH_WATERMARK_reclaim)
-                       journal_flags |= JOURNAL_RES_GET_NONBLOCK;
-
-               ret = drop_locks_do(trans,
-                       bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
-               if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
-                       ret = -BCH_ERR_journal_reclaim_would_deadlock;
+               bch2_trans_unlock(trans);
+               wait_event(c->journal.wait, !test_bit(JOURNAL_SPACE_LOW, &c->journal.flags));
+               ret = bch2_trans_relock(trans);
                if (ret)
                        return ERR_PTR(ret);
        }
 
        while (1) {
-               nr_nodes[!!update_level] += 1 + split;
-               update_level++;
+               nr_nodes[!!level_end] += 1 + split;
+               level_end++;
 
-               ret = bch2_btree_path_upgrade(trans, path, update_level + 1);
+               ret = bch2_btree_path_upgrade(trans, path, level_end + 1);
                if (ret)
                        return ERR_PTR(ret);
 
-               if (!btree_path_node(path, update_level)) {
+               if (!btree_path_node(path, level_end)) {
                        /* Allocating new root? */
                        nr_nodes[1] += split;
-                       update_level = BTREE_MAX_DEPTH;
+                       level_end = BTREE_MAX_DEPTH;
                        break;
                }
 
@@ -1158,11 +1184,11 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                 * Always check for space for two keys, even if we won't have to
                 * split at prior level - it might have been a merge instead:
                 */
-               if (bch2_btree_node_insert_fits(path->l[update_level].b,
+               if (bch2_btree_node_insert_fits(path->l[level_end].b,
                                                BKEY_BTREE_PTR_U64s_MAX * 2))
                        break;
 
-               split = path->l[update_level].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
+               split = path->l[level_end].b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c);
        }
 
        if (!down_read_trylock(&c->gc_lock)) {
@@ -1176,14 +1202,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        as = mempool_alloc(&c->btree_interior_update_pool, GFP_NOFS);
        memset(as, 0, sizeof(*as));
        closure_init(&as->cl, NULL);
-       as->c           = c;
-       as->start_time  = start_time;
-       as->ip_started  = _RET_IP_;
-       as->mode        = BTREE_UPDATE_none;
-       as->watermark   = watermark;
-       as->took_gc_lock = true;
-       as->btree_id    = path->btree_id;
-       as->update_level = update_level;
+       as->c                   = c;
+       as->start_time          = start_time;
+       as->ip_started          = _RET_IP_;
+       as->mode                = BTREE_UPDATE_none;
+       as->watermark           = watermark;
+       as->took_gc_lock        = true;
+       as->btree_id            = path->btree_id;
+       as->update_level_start  = level_start;
+       as->update_level_end    = level_end;
        INIT_LIST_HEAD(&as->list);
        INIT_LIST_HEAD(&as->unwritten_list);
        INIT_LIST_HEAD(&as->write_blocked_list);
@@ -1277,23 +1304,29 @@ static void bch2_btree_set_root_inmem(struct bch_fs *c, struct btree *b)
        bch2_recalc_btree_reserve(c);
 }
 
-static void bch2_btree_set_root(struct btree_update *as,
-                               struct btree_trans *trans,
-                               struct btree_path *path,
-                               struct btree *b)
+static int bch2_btree_set_root(struct btree_update *as,
+                              struct btree_trans *trans,
+                              struct btree_path *path,
+                              struct btree *b,
+                              bool nofail)
 {
        struct bch_fs *c = as->c;
-       struct btree *old;
 
        trace_and_count(c, btree_node_set_root, trans, b);
 
-       old = btree_node_root(c, b);
+       struct btree *old = btree_node_root(c, b);
 
        /*
         * Ensure no one is using the old root while we switch to the
         * new root:
         */
-       bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+       if (nofail) {
+               bch2_btree_node_lock_write_nofail(trans, path, &old->c);
+       } else {
+               int ret = bch2_btree_node_lock_write(trans, path, &old->c);
+               if (ret)
+                       return ret;
+       }
 
        bch2_btree_set_root_inmem(c, b);
 
@@ -1307,6 +1340,7 @@ static void bch2_btree_set_root(struct btree_update *as,
         * depend on the new root would have to update the new root.
         */
        bch2_btree_node_unlock_write(trans, path, old);
+       return 0;
 }
 
 /* Interior node updates: */
@@ -1373,12 +1407,12 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as,
 }
 
 static void
-__bch2_btree_insert_keys_interior(struct btree_update *as,
-                                 struct btree_trans *trans,
-                                 struct btree_path *path,
-                                 struct btree *b,
-                                 struct btree_node_iter node_iter,
-                                 struct keylist *keys)
+bch2_btree_insert_keys_interior(struct btree_update *as,
+                               struct btree_trans *trans,
+                               struct btree_path *path,
+                               struct btree *b,
+                               struct btree_node_iter node_iter,
+                               struct keylist *keys)
 {
        struct bkey_i *insert = bch2_keylist_front(keys);
        struct bkey_packed *k;
@@ -1534,7 +1568,7 @@ static void btree_split_insert_keys(struct btree_update *as,
 
                bch2_btree_node_iter_init(&node_iter, b, &bch2_keylist_front(keys)->k.p);
 
-               __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
+               bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
 
                BUG_ON(bch2_btree_node_check_topology(trans, b));
        }
@@ -1649,15 +1683,16 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
        if (parent) {
                /* Split a non root node */
                ret = bch2_btree_insert_node(as, trans, path, parent, &as->parent_keys);
-               if (ret)
-                       goto err;
        } else if (n3) {
-               bch2_btree_set_root(as, trans, trans->paths + path, n3);
+               ret = bch2_btree_set_root(as, trans, trans->paths + path, n3, false);
        } else {
                /* Root filled up but didn't need to be split */
-               bch2_btree_set_root(as, trans, trans->paths + path, n1);
+               ret = bch2_btree_set_root(as, trans, trans->paths + path, n1, false);
        }
 
+       if (ret)
+               goto err;
+
        if (n3) {
                bch2_btree_update_get_open_buckets(as, n3);
                bch2_btree_node_write(c, n3, SIX_LOCK_intent, 0);
@@ -1714,27 +1749,6 @@ err:
        goto out;
 }
 
-static void
-bch2_btree_insert_keys_interior(struct btree_update *as,
-                               struct btree_trans *trans,
-                               struct btree_path *path,
-                               struct btree *b,
-                               struct keylist *keys)
-{
-       struct btree_path *linked;
-       unsigned i;
-
-       __bch2_btree_insert_keys_interior(as, trans, path, b,
-                                         path->l[b->c.level].iter, keys);
-
-       btree_update_updated_node(as, b);
-
-       trans_for_each_path_with_node(trans, b, linked, i)
-               bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
-
-       bch2_trans_verify_paths(trans);
-}
-
 /**
  * bch2_btree_insert_node - insert bkeys into a given btree node
  *
@@ -1755,7 +1769,8 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
                                  struct keylist *keys)
 {
        struct bch_fs *c = as->c;
-       struct btree_path *path = trans->paths + path_idx;
+       struct btree_path *path = trans->paths + path_idx, *linked;
+       unsigned i;
        int old_u64s = le16_to_cpu(btree_bset_last(b)->u64s);
        int old_live_u64s = b->nr.live_u64s;
        int live_u64s_added, u64s_added;
@@ -1784,7 +1799,13 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
                return ret;
        }
 
-       bch2_btree_insert_keys_interior(as, trans, path, b, keys);
+       bch2_btree_insert_keys_interior(as, trans, path, b,
+                                       path->l[b->c.level].iter, keys);
+
+       trans_for_each_path_with_node(trans, b, linked, i)
+               bch2_btree_node_iter_peek(&linked->l[b->c.level].iter, b);
+
+       bch2_trans_verify_paths(trans);
 
        live_u64s_added = (int) b->nr.live_u64s - old_live_u64s;
        u64s_added = (int) le16_to_cpu(btree_bset_last(b)->u64s) - old_u64s;
@@ -1798,6 +1819,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
            bch2_maybe_compact_whiteouts(c, b))
                bch2_trans_node_reinit_iter(trans, b);
 
+       btree_update_updated_node(as, b);
        bch2_btree_node_unlock_write(trans, path, b);
 
        BUG_ON(bch2_btree_node_check_topology(trans, b));
@@ -1807,7 +1829,7 @@ split:
         * We could attempt to avoid the transaction restart, by calling
         * bch2_btree_path_upgrade() and allocating more nodes:
         */
-       if (b->c.level >= as->update_level) {
+       if (b->c.level >= as->update_level_end) {
                trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
        }
@@ -1873,7 +1895,9 @@ static void __btree_increase_depth(struct btree_update *as, struct btree_trans *
        bch2_keylist_add(&as->parent_keys, &b->key);
        btree_split_insert_keys(as, trans, path_idx, n, &as->parent_keys);
 
-       bch2_btree_set_root(as, trans, path, n);
+       int ret = bch2_btree_set_root(as, trans, path, n, true);
+       BUG_ON(ret);
+
        bch2_btree_update_get_open_buckets(as, n);
        bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
        bch2_trans_node_add(trans, path, n);
@@ -1926,6 +1950,18 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
        BUG_ON(!trans->paths[path].should_be_locked);
        BUG_ON(!btree_node_locked(&trans->paths[path], level));
 
+       /*
+        * Work around a deadlock caused by the btree write buffer not doing
+        * merges and leaving tons of merges for us to do - we really don't need
+        * to be doing merges at all from the interior update path, and if the
+        * interior update path is generating too many new interior updates we
+        * deadlock:
+        */
+       if ((flags & BCH_WATERMARK_MASK) == BCH_WATERMARK_interior_updates)
+               return 0;
+
+       flags &= ~BCH_WATERMARK_MASK;
+
        b = trans->paths[path].l[level].b;
 
        if ((sib == btree_prev_sib && bpos_eq(b->data->min_key, POS_MIN)) ||
@@ -2071,6 +2107,10 @@ err:
                bch2_path_put(trans, new_path, true);
        bch2_path_put(trans, sib_path, true);
        bch2_trans_verify_locks(trans);
+       if (ret == -BCH_ERR_journal_reclaim_would_deadlock)
+               ret = 0;
+       if (!ret)
+               ret = bch2_trans_relock(trans);
        return ret;
 err_free_update:
        bch2_btree_node_free_never_used(as, trans, n);
@@ -2116,12 +2156,13 @@ int bch2_btree_node_rewrite(struct btree_trans *trans,
        if (parent) {
                bch2_keylist_add(&as->parent_keys, &n->key);
                ret = bch2_btree_insert_node(as, trans, iter->path, parent, &as->parent_keys);
-               if (ret)
-                       goto err;
        } else {
-               bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n);
+               ret = bch2_btree_set_root(as, trans, btree_iter_path(trans, iter), n, false);
        }
 
+       if (ret)
+               goto err;
+
        bch2_btree_update_get_open_buckets(as, n);
        bch2_btree_node_write(c, n, SIX_LOCK_intent, 0);
 
@@ -2519,9 +2560,11 @@ void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned lev
 
 static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
 {
-       prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+       prt_printf(out, "%ps: btree=%s l=%u-%u watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
                   (void *) as->ip_started,
                   bch2_btree_id_str(as->btree_id),
+                  as->update_level_start,
+                  as->update_level_end,
                   bch2_watermarks[as->watermark],
                   bch2_btree_update_modes[as->mode],
                   as->nodes_written,
index 88dcf5a22a3bd628aaa22065f3cdd70ca3770d90..c1a479ebaad12120813f95a4af50b32cd542023d 100644 (file)
@@ -57,7 +57,8 @@ struct btree_update {
        unsigned                        took_gc_lock:1;
 
        enum btree_id                   btree_id;
-       unsigned                        update_level;
+       unsigned                        update_level_start;
+       unsigned                        update_level_end;
 
        struct disk_reservation         disk_res;
 
index baf63e2fddb64cd8f4c745d0cc80c864c86ffaa6..36a6f42aba5e6fc5a36418c1d7565e07e8f90420 100644 (file)
@@ -316,6 +316,16 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
                            bpos_gt(k->k.k.p, path->l[0].b->key.k.p)) {
                                bch2_btree_node_unlock_write(trans, path, path->l[0].b);
                                write_locked = false;
+
+                               ret = lockrestart_do(trans,
+                                       bch2_btree_iter_traverse(&iter) ?:
+                                       bch2_foreground_maybe_merge(trans, iter.path, 0,
+                                                       BCH_WATERMARK_reclaim|
+                                                       BCH_TRANS_COMMIT_journal_reclaim|
+                                                       BCH_TRANS_COMMIT_no_check_rw|
+                                                       BCH_TRANS_COMMIT_no_enospc));
+                               if (ret)
+                                       goto err;
                        }
                }
 
@@ -382,10 +392,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
 
                        ret = commit_do(trans, NULL, NULL,
                                        BCH_WATERMARK_reclaim|
+                                       BCH_TRANS_COMMIT_journal_reclaim|
                                        BCH_TRANS_COMMIT_no_check_rw|
                                        BCH_TRANS_COMMIT_no_enospc|
-                                       BCH_TRANS_COMMIT_no_journal_res|
-                                       BCH_TRANS_COMMIT_journal_reclaim,
+                                       BCH_TRANS_COMMIT_no_journal_res ,
                                        btree_write_buffered_insert(trans, i));
                        if (ret)
                                goto err;
index 00aaf4bb513974a6b9c0353ea9445f92671c32eb..f9af5adabe83638eea7ffd15ea2f730085f81cc1 100644 (file)
@@ -395,14 +395,6 @@ static inline const char *bch2_data_type_str(enum bch_data_type type)
                : "(invalid data type)";
 }
 
-static inline void bch2_prt_data_type(struct printbuf *out, enum bch_data_type type)
-{
-       if (type < BCH_DATA_NR)
-               prt_str(out, __bch2_data_types[type]);
-       else
-               prt_printf(out, "(invalid data type %u)", type);
-}
-
 /* disk reservations: */
 
 static inline void bch2_disk_reservation_put(struct bch_fs *c,
index cbfa6459bdbceec6a953f91a385fc5e4fe76691d..72781aad6ba70ccc774b688c6a9d50b2dc21f133 100644 (file)
@@ -134,42 +134,38 @@ static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg
 struct fsck_thread {
        struct thread_with_stdio thr;
        struct bch_fs           *c;
-       char                    **devs;
-       size_t                  nr_devs;
        struct bch_opts         opts;
 };
 
 static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
 {
        struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
-       if (thr->devs)
-               for (size_t i = 0; i < thr->nr_devs; i++)
-                       kfree(thr->devs[i]);
-       kfree(thr->devs);
        kfree(thr);
 }
 
 static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
 {
        struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
-       struct bch_fs *c = bch2_fs_open(thr->devs, thr->nr_devs, thr->opts);
-
-       if (IS_ERR(c))
-               return PTR_ERR(c);
+       struct bch_fs *c = thr->c;
 
-       int ret = 0;
-       if (test_bit(BCH_FS_errors_fixed, &c->flags))
-               ret |= 1;
-       if (test_bit(BCH_FS_error, &c->flags))
-               ret |= 4;
+       int ret = PTR_ERR_OR_ZERO(c);
+       if (ret)
+               return ret;
 
-       bch2_fs_stop(c);
+       ret = bch2_fs_start(thr->c);
+       if (ret)
+               goto err;
 
-       if (ret & 1)
+       if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
                bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
-       if (ret & 4)
+               ret |= 1;
+       }
+       if (test_bit(BCH_FS_error, &c->flags)) {
                bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
-
+               ret |= 4;
+       }
+err:
+       bch2_fs_stop(c);
        return ret;
 }
 
@@ -182,7 +178,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
 {
        struct bch_ioctl_fsck_offline arg;
        struct fsck_thread *thr = NULL;
-       u64 *devs = NULL;
+       darray_str(devs) = {};
        long ret = 0;
 
        if (copy_from_user(&arg, user_arg, sizeof(arg)))
@@ -194,29 +190,32 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
 
-       if (!(devs = kcalloc(arg.nr_devs, sizeof(*devs), GFP_KERNEL)) ||
-           !(thr = kzalloc(sizeof(*thr), GFP_KERNEL)) ||
-           !(thr->devs = kcalloc(arg.nr_devs, sizeof(*thr->devs), GFP_KERNEL))) {
-               ret = -ENOMEM;
-               goto err;
-       }
+       for (size_t i = 0; i < arg.nr_devs; i++) {
+               u64 dev_u64;
+               ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
+               if (ret)
+                       goto err;
 
-       thr->opts = bch2_opts_empty();
-       thr->nr_devs = arg.nr_devs;
+               char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
+               ret = PTR_ERR_OR_ZERO(dev_str);
+               if (ret)
+                       goto err;
 
-       if (copy_from_user(devs, &user_arg->devs[0],
-                          array_size(sizeof(user_arg->devs[0]), arg.nr_devs))) {
-               ret = -EINVAL;
-               goto err;
+               ret = darray_push(&devs, dev_str);
+               if (ret) {
+                       kfree(dev_str);
+                       goto err;
+               }
        }
 
-       for (size_t i = 0; i < arg.nr_devs; i++) {
-               thr->devs[i] = strndup_user((char __user *)(unsigned long) devs[i], PATH_MAX);
-               ret = PTR_ERR_OR_ZERO(thr->devs[i]);
-               if (ret)
-                       goto err;
+       thr = kzalloc(sizeof(*thr), GFP_KERNEL);
+       if (!thr) {
+               ret = -ENOMEM;
+               goto err;
        }
 
+       thr->opts = bch2_opts_empty();
+
        if (arg.opts) {
                char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
 
@@ -230,15 +229,26 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
 
        opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
 
+       /* We need request_key() to be called before we punt to kthread: */
+       opt_set(thr->opts, nostart, true);
+
+       thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
+
+       if (!IS_ERR(thr->c) &&
+           thr->c->opts.errors == BCH_ON_ERROR_panic)
+               thr->c->opts.errors = BCH_ON_ERROR_ro;
+
        ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_offline_fsck_ops);
-err:
-       if (ret < 0) {
-               if (thr)
-                       bch2_fsck_thread_exit(&thr->thr);
-               pr_err("ret %s", bch2_err_str(ret));
-       }
-       kfree(devs);
+out:
+       darray_for_each(devs, i)
+               kfree(*i);
+       darray_exit(&devs);
        return ret;
+err:
+       if (thr)
+               bch2_fsck_thread_exit(&thr->thr);
+       pr_err("ret %s", bch2_err_str(ret));
+       goto out;
 }
 
 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
index 4701457f6381ca820e17a12707009c272ed5b4ac..7ed779b411f61e4e3f05a703ce9e091474237939 100644 (file)
@@ -429,15 +429,20 @@ int bch2_rechecksum_bio(struct bch_fs *c, struct bio *bio,
                                extent_nonce(version, crc_old), bio);
 
        if (bch2_crc_cmp(merged, crc_old.csum) && !c->opts.no_data_io) {
-               bch_err(c, "checksum error in %s() (memory corruption or bug?)\n"
-                       "expected %0llx:%0llx got %0llx:%0llx (old type %s new type %s)",
-                       __func__,
-                       crc_old.csum.hi,
-                       crc_old.csum.lo,
-                       merged.hi,
-                       merged.lo,
-                       bch2_csum_types[crc_old.csum_type],
-                       bch2_csum_types[new_csum_type]);
+               struct printbuf buf = PRINTBUF;
+               prt_printf(&buf, "checksum error in %s() (memory corruption or bug?)\n"
+                          "expected %0llx:%0llx got %0llx:%0llx (old type ",
+                          __func__,
+                          crc_old.csum.hi,
+                          crc_old.csum.lo,
+                          merged.hi,
+                          merged.lo);
+               bch2_prt_csum_type(&buf, crc_old.csum_type);
+               prt_str(&buf, " new type ");
+               bch2_prt_csum_type(&buf, new_csum_type);
+               prt_str(&buf, ")");
+               bch_err(c, "%s", buf.buf);
+               printbuf_exit(&buf);
                return -EIO;
        }
 
index 1b8c2c1016dc6347ce12ef3161d4723835dfa56e..e40499fde9a4019fc75d62f825e9e5583caf803b 100644 (file)
@@ -61,11 +61,12 @@ static inline void bch2_csum_err_msg(struct printbuf *out,
                                     struct bch_csum expected,
                                     struct bch_csum got)
 {
-       prt_printf(out, "checksum error: got ");
+       prt_str(out, "checksum error, type ");
+       bch2_prt_csum_type(out, type);
+       prt_str(out, ": got ");
        bch2_csum_to_text(out, type, got);
        prt_str(out, " should be ");
        bch2_csum_to_text(out, type, expected);
-       prt_printf(out, " type %s", bch2_csum_types[type]);
 }
 
 int bch2_chacha_encrypt_key(struct bch_key *, struct nonce, void *, size_t);
index 58c2eb45570ff022764720f9beb10ecfa2926367..607fd5e232c902dbb39f3dac84ea2e214e6b106c 100644 (file)
@@ -47,14 +47,6 @@ static inline enum bch_compression_type bch2_compression_opt_to_type(unsigned v)
        return __bch2_compression_opt_to_type[bch2_compression_decode(v).type];
 }
 
-static inline void bch2_prt_compression_type(struct printbuf *out, enum bch_compression_type type)
-{
-       if (type < BCH_COMPRESSION_TYPE_NR)
-               prt_str(out, __bch2_compression_types[type]);
-       else
-               prt_printf(out, "(invalid compression type %u)", type);
-}
-
 int bch2_bio_uncompress_inplace(struct bch_fs *, struct bio *,
                                struct bch_extent_crc_unpacked *);
 int bch2_bio_uncompress(struct bch_fs *, struct bio *, struct bio *,
index 34731ee0217f62f6e43fb691e76083c46026b127..0022b51ce3c09cc9eafaab2f0639c944078d8c54 100644 (file)
@@ -598,6 +598,8 @@ int bch2_data_update_init(struct btree_trans *trans,
                i++;
        }
 
+       unsigned durability_required = max(0, (int) (io_opts.data_replicas - durability_have));
+
        /*
         * If current extent durability is less than io_opts.data_replicas,
         * we're not trying to rereplicate the extent up to data_replicas here -
@@ -607,7 +609,7 @@ int bch2_data_update_init(struct btree_trans *trans,
         * rereplicate, currently, so that users don't get an unexpected -ENOSPC
         */
        if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) &&
-           durability_have >= io_opts.data_replicas) {
+           !durability_required) {
                m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs;
                m->data_opts.rewrite_ptrs = 0;
                /* if iter == NULL, it's just a promote */
@@ -616,11 +618,18 @@ int bch2_data_update_init(struct btree_trans *trans,
                goto done;
        }
 
-       m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) +
+       m->op.nr_replicas = min(durability_removing, durability_required) +
                m->data_opts.extra_replicas;
-       m->op.nr_replicas_required = m->op.nr_replicas;
 
-       BUG_ON(!m->op.nr_replicas);
+       /*
+        * If device(s) were set to durability=0 after data was written to them
+        * we can end up with a duribilty=0 extent, and the normal algorithm
+        * that tries not to increase durability doesn't work:
+        */
+       if (!(durability_have + durability_removing))
+               m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1);
+
+       m->op.nr_replicas_required = m->op.nr_replicas;
 
        if (reserve_sectors) {
                ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors,
index 208ce6f0fc4317d561582bae51785da2c016a1cd..cd99b739941447f4c54037c8dc87bffd5f5e0d25 100644 (file)
@@ -13,6 +13,7 @@
 #include "btree_iter.h"
 #include "btree_locking.h"
 #include "btree_update.h"
+#include "btree_update_interior.h"
 #include "buckets.h"
 #include "debug.h"
 #include "error.h"
@@ -668,7 +669,7 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
        i->size = size;
        i->ret  = 0;
 
-       do {
+       while (1) {
                err = flush_buf(i);
                if (err)
                        return err;
@@ -676,9 +677,12 @@ static ssize_t bch2_journal_pins_read(struct file *file, char __user *buf,
                if (!i->size)
                        break;
 
+               if (done)
+                       break;
+
                done = bch2_journal_seq_pins_to_text(&i->buf, &c->journal, &i->iter);
                i->iter++;
-       } while (!done);
+       }
 
        if (i->buf.allocation_failure)
                return -ENOMEM;
@@ -693,13 +697,45 @@ static const struct file_operations journal_pins_ops = {
        .read           = bch2_journal_pins_read,
 };
 
+static ssize_t bch2_btree_updates_read(struct file *file, char __user *buf,
+                                      size_t size, loff_t *ppos)
+{
+       struct dump_iter *i = file->private_data;
+       struct bch_fs *c = i->c;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       if (!i->iter) {
+               bch2_btree_updates_to_text(&i->buf, c);
+               i->iter++;
+       }
+
+       err = flush_buf(i);
+       if (err)
+               return err;
+
+       if (i->buf.allocation_failure)
+               return -ENOMEM;
+
+       return i->ret;
+}
+
+static const struct file_operations btree_updates_ops = {
+       .owner          = THIS_MODULE,
+       .open           = bch2_dump_open,
+       .release        = bch2_dump_release,
+       .read           = bch2_btree_updates_read,
+};
+
 static int btree_transaction_stats_open(struct inode *inode, struct file *file)
 {
        struct bch_fs *c = inode->i_private;
        struct dump_iter *i;
 
        i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
-
        if (!i)
                return -ENOMEM;
 
@@ -866,6 +902,20 @@ void bch2_fs_debug_exit(struct bch_fs *c)
                debugfs_remove_recursive(c->fs_debug_dir);
 }
 
+static void bch2_fs_debug_btree_init(struct bch_fs *c, struct btree_debug *bd)
+{
+       struct dentry *d;
+
+       d = debugfs_create_dir(bch2_btree_id_str(bd->id), c->btree_debug_dir);
+
+       debugfs_create_file("keys", 0400, d, bd, &btree_debug_ops);
+
+       debugfs_create_file("formats", 0400, d, bd, &btree_format_debug_ops);
+
+       debugfs_create_file("bfloat-failed", 0400, d, bd,
+                           &bfloat_failed_debug_ops);
+}
+
 void bch2_fs_debug_init(struct bch_fs *c)
 {
        struct btree_debug *bd;
@@ -888,6 +938,9 @@ void bch2_fs_debug_init(struct bch_fs *c)
        debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
                            c->btree_debug, &journal_pins_ops);
 
+       debugfs_create_file("btree_updates", 0400, c->fs_debug_dir,
+                           c->btree_debug, &btree_updates_ops);
+
        debugfs_create_file("btree_transaction_stats", 0400, c->fs_debug_dir,
                            c, &btree_transaction_stats_op);
 
@@ -902,21 +955,7 @@ void bch2_fs_debug_init(struct bch_fs *c)
             bd < c->btree_debug + ARRAY_SIZE(c->btree_debug);
             bd++) {
                bd->id = bd - c->btree_debug;
-               debugfs_create_file(bch2_btree_id_str(bd->id),
-                                   0400, c->btree_debug_dir, bd,
-                                   &btree_debug_ops);
-
-               snprintf(name, sizeof(name), "%s-formats",
-                        bch2_btree_id_str(bd->id));
-
-               debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
-                                   &btree_format_debug_ops);
-
-               snprintf(name, sizeof(name), "%s-bfloat-failed",
-                        bch2_btree_id_str(bd->id));
-
-               debugfs_create_file(name, 0400, c->btree_debug_dir, bd,
-                                   &bfloat_failed_debug_ops);
+               bch2_fs_debug_btree_init(c, bd);
        }
 }
 
index 082075244e16aedc824249b239ecec6efb1a07fa..556a217108d32ef35890da0463751afc688186f3 100644 (file)
@@ -131,29 +131,33 @@ fsck_err:
 void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c,
                         struct bkey_s_c k)
 {
-       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-       unsigned i, nr_data = s->nr_blocks - s->nr_redundant;
+       const struct bch_stripe *sp = bkey_s_c_to_stripe(k).v;
+       struct bch_stripe s = {};
+
+       memcpy(&s, sp, min(sizeof(s), bkey_val_bytes(k.k)));
+
+       unsigned nr_data = s.nr_blocks - s.nr_redundant;
+
+       prt_printf(out, "algo %u sectors %u blocks %u:%u csum ",
+                  s.algorithm,
+                  le16_to_cpu(s.sectors),
+                  nr_data,
+                  s.nr_redundant);
+       bch2_prt_csum_type(out, s.csum_type);
+       prt_printf(out, " gran %u", 1U << s.csum_granularity_bits);
+
+       for (unsigned i = 0; i < s.nr_blocks; i++) {
+               const struct bch_extent_ptr *ptr = sp->ptrs + i;
+
+               if ((void *) ptr >= bkey_val_end(k))
+                       break;
+
+               bch2_extent_ptr_to_text(out, c, ptr);
 
-       prt_printf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u",
-              s->algorithm,
-              le16_to_cpu(s->sectors),
-              nr_data,
-              s->nr_redundant,
-              s->csum_type,
-              1U << s->csum_granularity_bits);
-
-       for (i = 0; i < s->nr_blocks; i++) {
-               const struct bch_extent_ptr *ptr = s->ptrs + i;
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-               u32 offset;
-               u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
-               prt_printf(out, " %u:%llu:%u", ptr->dev, b, offset);
-               if (i < nr_data)
-                       prt_printf(out, "#%u", stripe_blockcount_get(s, i));
-               prt_printf(out, " gen %u", ptr->gen);
-               if (ptr_stale(ca, ptr))
-                       prt_printf(out, " stale");
+               if (s.csum_type < BCH_CSUM_NR &&
+                   i < nr_data &&
+                   stripe_blockcount_offset(&s, i) < bkey_val_bytes(k.k))
+                       prt_printf(out,  "#%u", stripe_blockcount_get(sp, i));
        }
 }
 
@@ -607,10 +611,8 @@ static void ec_validate_checksums(struct bch_fs *c, struct ec_stripe_buf *buf)
                                struct printbuf err = PRINTBUF;
                                struct bch_dev *ca = bch_dev_bkey_exists(c, v->ptrs[i].dev);
 
-                               prt_printf(&err, "stripe checksum error: expected %0llx:%0llx got %0llx:%0llx (type %s)\n",
-                                          want.hi, want.lo,
-                                          got.hi, got.lo,
-                                          bch2_csum_types[v->csum_type]);
+                               prt_str(&err, "stripe ");
+                               bch2_csum_err_msg(&err, v->csum_type, want, got);
                                prt_printf(&err, "  for %ps at %u of\n  ", (void *) _RET_IP_, i);
                                bch2_bkey_val_to_text(&err, c, bkey_i_to_s_c(&buf->key));
                                bch_err_ratelimited(ca, "%s", err.buf);
index f4369b02e805f0a24572a8cf87d18867c3d3301a..f042616888b0a1d47d7797e987c912c58d0945b3 100644 (file)
@@ -32,6 +32,8 @@ static inline unsigned stripe_csums_per_device(const struct bch_stripe *s)
 static inline unsigned stripe_csum_offset(const struct bch_stripe *s,
                                          unsigned dev, unsigned csum_idx)
 {
+       EBUG_ON(s->csum_type >= BCH_CSUM_NR);
+
        unsigned csum_bytes = bch_crc_bytes[s->csum_type];
 
        return sizeof(struct bch_stripe) +
index 0e3ca99fbd2de1522c5e8dea8ac313232f60f7f3..1a331e539204852d4db9e7620df0282abe262f1e 100644 (file)
@@ -998,7 +998,9 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc
                        prt_str(out, " cached");
                if (ptr->unwritten)
                        prt_str(out, " unwritten");
-               if (ca && ptr_stale(ca, ptr))
+               if (b >= ca->mi.first_bucket &&
+                   b <  ca->mi.nbuckets &&
+                   ptr_stale(ca, ptr))
                        prt_printf(out, " stale");
        }
 }
@@ -1028,11 +1030,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
                        struct bch_extent_crc_unpacked crc =
                                bch2_extent_crc_unpack(k.k, entry_to_crc(entry));
 
-                       prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress ",
+                       prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ",
                               crc.compressed_size,
                               crc.uncompressed_size,
-                              crc.offset, crc.nonce,
-                              bch2_csum_types[crc.csum_type]);
+                              crc.offset, crc.nonce);
+                       bch2_prt_csum_type(out, crc.csum_type);
+                       prt_str(out, " compress ");
                        bch2_prt_compression_type(out, crc.compression_type);
                        break;
                }
index 4ce5e957a6e9162307d98b5b74b02338087f7e1e..0f955c3c76a7bcdce86556e4d09ba0e5cf4e7f9a 100644 (file)
@@ -115,7 +115,7 @@ static void swap_bytes(void *a, void *b, size_t n)
 
 struct wrapper {
        cmp_func_t cmp;
-       swap_func_t swap;
+       swap_func_t swap_func;
 };
 
 /*
@@ -125,7 +125,7 @@ struct wrapper {
 static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
 {
        if (swap_func == SWAP_WRAPPER) {
-               ((const struct wrapper *)priv)->swap(a, b, (int)size);
+               ((const struct wrapper *)priv)->swap_func(a, b, (int)size);
                return;
        }
 
@@ -174,7 +174,7 @@ void eytzinger0_sort_r(void *base, size_t n, size_t size,
        int i, c, r;
 
        /* called from 'sort' without swap function, let's pick the default */
-       if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+       if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap_func)
                swap_func = NULL;
 
        if (!swap_func) {
@@ -227,7 +227,7 @@ void eytzinger0_sort(void *base, size_t n, size_t size,
 {
        struct wrapper w = {
                .cmp  = cmp_func,
-               .swap = swap_func,
+               .swap_func = swap_func,
        };
 
        return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
index ee0e2df33322d2dccb60e1ed90257863769ead0d..24840aee335c0ffeabd3ad69c79665cc005e28d8 100644 (file)
@@ -242,8 +242,8 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
             (_i) = eytzinger0_next((_i), (_size)))
 
 /* return greatest node <= @search, or -1 if not found */
-static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-                                        cmp_func_t cmp, const void *search)
+static inline int eytzinger0_find_le(void *base, size_t nr, size_t size,
+                                    cmp_func_t cmp, const void *search)
 {
        unsigned i, n = 0;
 
@@ -256,18 +256,32 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
        } while (n < nr);
 
        if (n & 1) {
-               /* @i was greater than @search, return previous node: */
+               /*
+                * @i was greater than @search, return previous node:
+                *
+                * if @i was leftmost/smallest element,
+                * eytzinger0_prev(eytzinger0_first())) returns -1, as expected
+                */
                return eytzinger0_prev(i, nr);
        } else {
                return i;
        }
 }
 
-static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size,
-                                        cmp_func_t cmp, const void *search)
+static inline int eytzinger0_find_gt(void *base, size_t nr, size_t size,
+                                    cmp_func_t cmp, const void *search)
 {
        ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
-       return eytzinger0_next(idx, size);
+
+       /*
+        * if eytitzinger0_find_le() returned -1 - no element was <= search - we
+        * want to return the first element; next/prev identities mean this work
+        * as expected
+        *
+        * similarly if find_le() returns last element, we should return -1;
+        * identities mean this all works out:
+        */
+       return eytzinger0_next(idx, nr);
 }
 
 #define eytzinger0_find(base, nr, size, _cmp, search)                  \
index f49e6c0f0f6835968202ab2f1fa194933945554a..b889370a5088113a2417787bdbb4b98a16597063 100644 (file)
@@ -387,6 +387,8 @@ static __always_inline long bch2_dio_write_done(struct dio_write *dio)
        ret = dio->op.error ?: ((long) dio->written << 9);
        bio_put(&dio->op.wbio.bio);
 
+       bch2_write_ref_put(dio->op.c, BCH_WRITE_REF_dio_write);
+
        /* inode->i_dio_count is our ref on inode and thus bch_fs */
        inode_dio_end(&inode->v);
 
@@ -590,22 +592,25 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        prefetch(&inode->ei_inode);
        prefetch((void *) &inode->ei_inode + 64);
 
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_dio_write))
+               return -EROFS;
+
        inode_lock(&inode->v);
 
        ret = generic_write_checks(req, iter);
        if (unlikely(ret <= 0))
-               goto err;
+               goto err_put_write_ref;
 
        ret = file_remove_privs(file);
        if (unlikely(ret))
-               goto err;
+               goto err_put_write_ref;
 
        ret = file_update_time(file);
        if (unlikely(ret))
-               goto err;
+               goto err_put_write_ref;
 
        if (unlikely((req->ki_pos|iter->count) & (block_bytes(c) - 1)))
-               goto err;
+               goto err_put_write_ref;
 
        inode_dio_begin(&inode->v);
        bch2_pagecache_block_get(inode);
@@ -645,7 +650,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
        }
 
        ret = bch2_dio_write_loop(dio);
-err:
+out:
        if (locked)
                inode_unlock(&inode->v);
        return ret;
@@ -653,7 +658,9 @@ err_put_bio:
        bch2_pagecache_block_put(inode);
        bio_put(bio);
        inode_dio_end(&inode->v);
-       goto err;
+err_put_write_ref:
+       bch2_write_ref_put(c, BCH_WRITE_REF_dio_write);
+       goto out;
 }
 
 void bch2_fs_fs_io_direct_exit(struct bch_fs *c)
index 8c70123b6a0c809b6d50040593281c2e9c115828..20b40477425f49449499b11d63930d92e10ed3ba 100644 (file)
@@ -174,18 +174,18 @@ void __bch2_i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode,
 static int bch2_flush_inode(struct bch_fs *c,
                            struct bch_inode_info *inode)
 {
-       struct bch_inode_unpacked u;
-       int ret;
-
        if (c->opts.journal_flush_disabled)
                return 0;
 
-       ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u);
-       if (ret)
-               return ret;
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_fsync))
+               return -EROFS;
 
-       return bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
-               bch2_inode_flush_nocow_writes(c, inode);
+       struct bch_inode_unpacked u;
+       int ret = bch2_inode_find_by_inum(c, inode_inum(inode), &u) ?:
+                 bch2_journal_flush_seq(&c->journal, u.bi_journal_seq) ?:
+                 bch2_inode_flush_nocow_writes(c, inode);
+       bch2_write_ref_put(c, BCH_WRITE_REF_fsync);
+       return ret;
 }
 
 int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
index 725fcf46f6312c267c2a7c05f1eaa6aed5fb83e7..9aa28b52ab926c567f49e0bb68b9c6791fb326e5 100644 (file)
@@ -247,7 +247,7 @@ static void journal_entry_err_msg(struct printbuf *out,
 
        if (entry) {
                prt_str(out, " type=");
-               prt_str(out, bch2_jset_entry_types[entry->type]);
+               bch2_prt_jset_entry_type(out, entry->type);
        }
 
        if (!jset) {
@@ -403,7 +403,8 @@ static void journal_entry_btree_keys_to_text(struct printbuf *out, struct bch_fs
        jset_entry_for_each_key(entry, k) {
                if (!first) {
                        prt_newline(out);
-                       prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+                       bch2_prt_jset_entry_type(out, entry->type);
+                       prt_str(out, ": ");
                }
                prt_printf(out, "btree=%s l=%u ", bch2_btree_id_str(entry->btree_id), entry->level);
                bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(k));
@@ -563,9 +564,9 @@ static void journal_entry_usage_to_text(struct printbuf *out, struct bch_fs *c,
        struct jset_entry_usage *u =
                container_of(entry, struct jset_entry_usage, entry);
 
-       prt_printf(out, "type=%s v=%llu",
-              bch2_fs_usage_types[u->entry.btree_id],
-              le64_to_cpu(u->v));
+       prt_str(out, "type=");
+       bch2_prt_fs_usage_type(out, u->entry.btree_id);
+       prt_printf(out, " v=%llu", le64_to_cpu(u->v));
 }
 
 static int journal_entry_data_usage_validate(struct bch_fs *c,
@@ -827,11 +828,11 @@ int bch2_journal_entry_validate(struct bch_fs *c,
 void bch2_journal_entry_to_text(struct printbuf *out, struct bch_fs *c,
                                struct jset_entry *entry)
 {
+       bch2_prt_jset_entry_type(out, entry->type);
+
        if (entry->type < BCH_JSET_ENTRY_NR) {
-               prt_printf(out, "%s: ", bch2_jset_entry_types[entry->type]);
+               prt_str(out, ": ");
                bch2_jset_entry_ops[entry->type].to_text(out, c, entry);
-       } else {
-               prt_printf(out, "(unknown type %u)", entry->type);
        }
 }
 
index ab811c0dad26accfb4924eaef4cccb3ab957087c..04a577848b015cd900a1a040ec0565ffb2f69811 100644 (file)
@@ -67,6 +67,8 @@ void bch2_journal_set_watermark(struct journal *j)
            track_event_change(&c->times[BCH_TIME_blocked_write_buffer_full], low_on_wb))
                trace_and_count(c, journal_full, c);
 
+       mod_bit(JOURNAL_SPACE_LOW, &j->flags, low_on_space || low_on_pin);
+
        swap(watermark, j->watermark);
        if (watermark > j->watermark)
                journal_wake(j);
index 8c053cb64ca5ee25b9a5b2613f2fcd9e03d517d3..b5161b5d76a00874ed9ed88a0969927f2cfc9dbe 100644 (file)
@@ -134,6 +134,7 @@ enum journal_flags {
        JOURNAL_STARTED,
        JOURNAL_MAY_SKIP_FLUSH,
        JOURNAL_NEED_FLUSH_WRITE,
+       JOURNAL_SPACE_LOW,
 };
 
 /* Reasons we may fail to get a journal reservation: */
index e1800c4119b5fbaf8ebbfcdaef996e1dd9c35ca8..bb068fd724656cf8307d14022ca537f918b65747 100644 (file)
@@ -43,7 +43,7 @@ const char * const __bch2_btree_ids[] = {
        NULL
 };
 
-const char * const bch2_csum_types[] = {
+static const char * const __bch2_csum_types[] = {
        BCH_CSUM_TYPES()
        NULL
 };
@@ -53,7 +53,7 @@ const char * const bch2_csum_opts[] = {
        NULL
 };
 
-const char * const __bch2_compression_types[] = {
+static const char * const __bch2_compression_types[] = {
        BCH_COMPRESSION_TYPES()
        NULL
 };
@@ -83,18 +83,39 @@ const char * const bch2_member_states[] = {
        NULL
 };
 
-const char * const bch2_jset_entry_types[] = {
+static const char * const __bch2_jset_entry_types[] = {
        BCH_JSET_ENTRY_TYPES()
        NULL
 };
 
-const char * const bch2_fs_usage_types[] = {
+static const char * const __bch2_fs_usage_types[] = {
        BCH_FS_USAGE_TYPES()
        NULL
 };
 
 #undef x
 
+static void prt_str_opt_boundscheck(struct printbuf *out, const char * const opts[],
+                                   unsigned nr, const char *type, unsigned idx)
+{
+       if (idx < nr)
+               prt_str(out, opts[idx]);
+       else
+               prt_printf(out, "(unknown %s %u)", type, idx);
+}
+
+#define PRT_STR_OPT_BOUNDSCHECKED(name, type)                                  \
+void bch2_prt_##name(struct printbuf *out, type t)                             \
+{                                                                              \
+       prt_str_opt_boundscheck(out, __bch2_##name##s, ARRAY_SIZE(__bch2_##name##s) - 1, #name, t);\
+}
+
+PRT_STR_OPT_BOUNDSCHECKED(jset_entry_type,     enum bch_jset_entry_type);
+PRT_STR_OPT_BOUNDSCHECKED(fs_usage_type,       enum bch_fs_usage_type);
+PRT_STR_OPT_BOUNDSCHECKED(data_type,           enum bch_data_type);
+PRT_STR_OPT_BOUNDSCHECKED(csum_type,           enum bch_csum_type);
+PRT_STR_OPT_BOUNDSCHECKED(compression_type,    enum bch_compression_type);
+
 static int bch2_opt_fix_errors_parse(struct bch_fs *c, const char *val, u64 *res,
                                     struct printbuf *err)
 {
index 1ac4135cca1c3dccc71a75a0d062ee30df33111c..84e452835a17d84d36c4d0f3906501578bf702d3 100644 (file)
@@ -16,18 +16,20 @@ extern const char * const bch2_version_upgrade_opts[];
 extern const char * const bch2_sb_features[];
 extern const char * const bch2_sb_compat[];
 extern const char * const __bch2_btree_ids[];
-extern const char * const bch2_csum_types[];
 extern const char * const bch2_csum_opts[];
-extern const char * const __bch2_compression_types[];
 extern const char * const bch2_compression_opts[];
 extern const char * const bch2_str_hash_types[];
 extern const char * const bch2_str_hash_opts[];
 extern const char * const __bch2_data_types[];
 extern const char * const bch2_member_states[];
-extern const char * const bch2_jset_entry_types[];
-extern const char * const bch2_fs_usage_types[];
 extern const char * const bch2_d_types[];
 
+void bch2_prt_jset_entry_type(struct printbuf *,       enum bch_jset_entry_type);
+void bch2_prt_fs_usage_type(struct printbuf *,         enum bch_fs_usage_type);
+void bch2_prt_data_type(struct printbuf *,             enum bch_data_type);
+void bch2_prt_csum_type(struct printbuf *,             enum bch_csum_type);
+void bch2_prt_compression_type(struct printbuf *,      enum bch_compression_type);
+
 static inline const char *bch2_d_type_str(unsigned d_type)
 {
        return (d_type < BCH_DT_MAX ? bch2_d_types[d_type] : NULL) ?: "(bad d_type)";
index b76c16152579c6d3e5a51dbf54c839392c0ce0b2..0f328aba9760ba0e89fd015ee757239b6d8bd8c4 100644 (file)
@@ -47,20 +47,6 @@ void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
        }
 }
 
-static bool btree_id_is_alloc(enum btree_id id)
-{
-       switch (id) {
-       case BTREE_ID_alloc:
-       case BTREE_ID_backpointers:
-       case BTREE_ID_need_discard:
-       case BTREE_ID_freespace:
-       case BTREE_ID_bucket_gens:
-               return true;
-       default:
-               return false;
-       }
-}
-
 /* for -o reconstruct_alloc: */
 static void bch2_reconstruct_alloc(struct bch_fs *c)
 {
index cb501460d6152b31a4ae57d9dea6db0792c47c0f..0cec0f7d9703520a3cf24bcc2ca2ce7f86285ebc 100644 (file)
@@ -44,7 +44,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c)
 
        set_bit(BCH_FS_may_go_rw, &c->flags);
 
-       if (keys->nr || c->opts.fsck || !c->sb.clean)
+       if (keys->nr || c->opts.fsck || !c->sb.clean || c->recovery_passes_explicit)
                return bch2_fs_read_write_early(c);
        return 0;
 }
index d6f81179c3a29b6e884f92c94628d512626c6b45..a98ef940b7a3280bd0da0474ef4f387fdcd0cc18 100644 (file)
          BCH_FSCK_ERR_subvol_fs_path_parent_wrong)             \
        x(btree_subvolume_children,                             \
          BIT_ULL(BCH_RECOVERY_PASS_check_subvols),             \
-         BCH_FSCK_ERR_subvol_children_not_set)
+         BCH_FSCK_ERR_subvol_children_not_set)                 \
+       x(mi_btree_bitmap,                                      \
+         BIT_ULL(BCH_RECOVERY_PASS_check_allocations),         \
+         BCH_FSCK_ERR_btree_bitmap_not_marked)
 
 #define DOWNGRADE_TABLE()
 
index d7d609131030a817c5fa2867fc3cee5796fb898c..4ca6e7b0d8aaed2c4b95fff82c2ed964c6a102ad 100644 (file)
        x(bucket_gens_nonzero_for_invalid_buckets,              122)    \
        x(need_discard_freespace_key_to_invalid_dev_bucket,     123)    \
        x(need_discard_freespace_key_bad,                       124)    \
-       x(backpointer_pos_wrong,                                125)    \
+       x(backpointer_bucket_offset_wrong,                      125)    \
        x(backpointer_to_missing_device,                        126)    \
        x(backpointer_to_missing_alloc,                         127)    \
        x(backpointer_to_missing_ptr,                           128)    \
        x(btree_ptr_v2_min_key_bad,                             262)    \
        x(btree_root_unreadable_and_scan_found_nothing,         263)    \
        x(snapshot_node_missing,                                264)    \
-       x(dup_backpointer_to_bad_csum_extent,                   265)
+       x(dup_backpointer_to_bad_csum_extent,                   265)    \
+       x(btree_bitmap_not_marked,                              266)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index eff5ce18c69c0600047c1fef688a5980af33c678..522a969345e5289ac87cf53b5f5a735e3b5f8d67 100644 (file)
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "btree_cache.h"
 #include "disk_groups.h"
 #include "opts.h"
 #include "replicas.h"
@@ -426,3 +427,55 @@ void bch2_dev_errors_reset(struct bch_dev *ca)
        bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
 }
+
+/*
+ * Per member "range has btree nodes" bitmap:
+ *
+ * This is so that if we ever have to run the btree node scan to repair we don't
+ * have to scan full devices:
+ */
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
+{
+       bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+               if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
+                                                         ptr->offset, btree_sectors(c)))
+                       return false;
+       return true;
+}
+
+static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
+                               u64 start, unsigned sectors)
+{
+       struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
+       u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
+
+       u64 end = start + sectors;
+
+       int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
+       if (resize > 0) {
+               u64 new_bitmap = 0;
+
+               for (unsigned i = 0; i < 64; i++)
+                       if (bitmap & BIT_ULL(i))
+                               new_bitmap |= BIT_ULL(i >> resize);
+               bitmap = new_bitmap;
+               m->btree_bitmap_shift += resize;
+       }
+
+       for (unsigned bit = sectors >> m->btree_bitmap_shift;
+            bit << m->btree_bitmap_shift < end;
+            bit++)
+               bitmap |= BIT_ULL(bit);
+
+       m->btree_allocated_bitmap = cpu_to_le64(bitmap);
+}
+
+void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
+{
+       lockdep_assert_held(&c->sb_lock);
+
+       struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
+       bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
+               __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
+}
index be0a941832715a32634b8c3dea60bbf1685a672f..b27c3e4467cf288d67587143e5343d57d5aa41c9 100644 (file)
@@ -3,6 +3,7 @@
 #define _BCACHEFS_SB_MEMBERS_H
 
 #include "darray.h"
+#include "bkey_types.h"
 
 extern char * const bch2_member_error_strs[];
 
@@ -220,6 +221,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi)
                        : 1,
                .freespace_initialized = BCH_MEMBER_FREESPACE_INITIALIZED(mi),
                .valid          = bch2_member_exists(mi),
+               .btree_bitmap_shift     = mi->btree_bitmap_shift,
+               .btree_allocated_bitmap = le64_to_cpu(mi->btree_allocated_bitmap),
        };
 }
 
@@ -228,4 +231,22 @@ void bch2_sb_members_from_cpu(struct bch_fs *);
 void bch2_dev_io_errors_to_text(struct printbuf *, struct bch_dev *);
 void bch2_dev_errors_reset(struct bch_dev *);
 
+static inline bool bch2_dev_btree_bitmap_marked_sectors(struct bch_dev *ca, u64 start, unsigned sectors)
+{
+       u64 end = start + sectors;
+
+       if (end > 64 << ca->mi.btree_bitmap_shift)
+               return false;
+
+       for (unsigned bit = sectors >> ca->mi.btree_bitmap_shift;
+            bit << ca->mi.btree_bitmap_shift < end;
+            bit++)
+               if (!(ca->mi.btree_allocated_bitmap & BIT_ULL(bit)))
+                       return false;
+       return true;
+}
+
+bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
+void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
+
 #endif /* _BCACHEFS_SB_MEMBERS_H */
index 0e806f04f3d7c5117ade3d612b1c851da243aead..544322d5c2517070143d367fa15d4ff353642556 100644 (file)
@@ -125,6 +125,15 @@ static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ances
        return s->parent;
 }
 
+static bool test_ancestor_bitmap(struct snapshot_table *t, u32 id, u32 ancestor)
+{
+       const struct snapshot_t *s = __snapshot_t(t, id);
+       if (!s)
+               return false;
+
+       return test_bit(ancestor - id - 1, s->is_ancestor);
+}
+
 bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
 {
        bool ret;
@@ -140,13 +149,11 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
        while (id && id < ancestor - IS_ANCESTOR_BITMAP)
                id = get_ancestor_below(t, id, ancestor);
 
-       if (id && id < ancestor) {
-               ret = test_bit(ancestor - id - 1, __snapshot_t(t, id)->is_ancestor);
+       ret = id && id < ancestor
+               ? test_ancestor_bitmap(t, id, ancestor)
+               : id == ancestor;
 
-               EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
-       } else {
-               ret = id == ancestor;
-       }
+       EBUG_ON(ret != __bch2_snapshot_is_ancestor_early(t, id, ancestor));
 out:
        rcu_read_unlock();
 
index 5eee055ee2721a3967fb31ca38adcbc5672521d6..08ea3dbbbe97ce11833fe79baa5fd87935919339 100644 (file)
@@ -700,8 +700,11 @@ retry:
                return -ENOMEM;
 
        sb->sb_name = kstrdup(path, GFP_KERNEL);
-       if (!sb->sb_name)
-               return -ENOMEM;
+       if (!sb->sb_name) {
+               ret = -ENOMEM;
+               prt_printf(&err, "error allocating memory for sb_name");
+               goto err;
+       }
 
 #ifndef __KERNEL__
        if (opt_get(*opts, direct_io) == false)
index ed63018f21bef58b2aa854f9c3f05ad1b3f26202..8daf80a38d60c6e4fa97b97345d3d4ecb80e7e88 100644 (file)
@@ -288,8 +288,13 @@ static void __bch2_fs_read_only(struct bch_fs *c)
        if (test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags) &&
            !test_bit(BCH_FS_emergency_ro, &c->flags))
                set_bit(BCH_FS_clean_shutdown, &c->flags);
+
        bch2_fs_journal_stop(&c->journal);
 
+       bch_info(c, "%sshutdown complete, journal seq %llu",
+                test_bit(BCH_FS_clean_shutdown, &c->flags) ? "" : "un",
+                c->journal.seq_ondisk);
+
        /*
         * After stopping journal:
         */
index ec784d975f6655a378207692644975e53271ddca..11bcef170c2c22644108e9fbec9b24eaf478059c 100644 (file)
@@ -37,6 +37,8 @@ struct bch_member_cpu {
        u8                      durability;
        u8                      freespace_initialized;
        u8                      valid;
+       u8                      btree_bitmap_shift;
+       u64                     btree_allocated_bitmap;
 };
 
 #endif /* _BCACHEFS_SUPER_TYPES_H */
index c86a93a8d8fc81bbe373efcbec74f3e2563e6da5..5be92fe3f4ea4e115512f0b7a31482919406a507 100644 (file)
@@ -17,7 +17,6 @@
 #include "btree_iter.h"
 #include "btree_key_cache.h"
 #include "btree_update.h"
-#include "btree_update_interior.h"
 #include "btree_gc.h"
 #include "buckets.h"
 #include "clock.h"
@@ -26,6 +25,7 @@
 #include "ec.h"
 #include "inode.h"
 #include "journal.h"
+#include "journal_reclaim.h"
 #include "keylist.h"
 #include "move.h"
 #include "movinggc.h"
@@ -139,6 +139,7 @@ do {                                                                        \
 write_attribute(trigger_gc);
 write_attribute(trigger_discards);
 write_attribute(trigger_invalidates);
+write_attribute(trigger_journal_flush);
 write_attribute(prune_cache);
 write_attribute(btree_wakeup);
 rw_attribute(btree_gc_periodic);
@@ -166,7 +167,6 @@ read_attribute(btree_write_stats);
 read_attribute(btree_cache_size);
 read_attribute(compression_stats);
 read_attribute(journal_debug);
-read_attribute(btree_updates);
 read_attribute(btree_cache);
 read_attribute(btree_key_cache);
 read_attribute(stripes_heap);
@@ -415,9 +415,6 @@ SHOW(bch2_fs)
        if (attr == &sysfs_journal_debug)
                bch2_journal_debug_to_text(out, &c->journal);
 
-       if (attr == &sysfs_btree_updates)
-               bch2_btree_updates_to_text(out, c);
-
        if (attr == &sysfs_btree_cache)
                bch2_btree_cache_to_text(out, c);
 
@@ -505,7 +502,7 @@ STORE(bch2_fs)
 
        /* Debugging: */
 
-       if (!test_bit(BCH_FS_rw, &c->flags))
+       if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
                return -EROFS;
 
        if (attr == &sysfs_prune_cache) {
@@ -538,6 +535,11 @@ STORE(bch2_fs)
        if (attr == &sysfs_trigger_invalidates)
                bch2_do_invalidates(c);
 
+       if (attr == &sysfs_trigger_journal_flush) {
+               bch2_journal_flush_all_pins(&c->journal);
+               bch2_journal_meta(&c->journal);
+       }
+
 #ifdef CONFIG_BCACHEFS_TESTS
        if (attr == &sysfs_perf_test) {
                char *tmp = kstrdup(buf, GFP_KERNEL), *p = tmp;
@@ -558,6 +560,7 @@ STORE(bch2_fs)
                        size = ret;
        }
 #endif
+       bch2_write_ref_put(c, BCH_WRITE_REF_sysfs);
        return size;
 }
 SYSFS_OPS(bch2_fs);
@@ -639,7 +642,6 @@ SYSFS_OPS(bch2_fs_internal);
 struct attribute *bch2_fs_internal_files[] = {
        &sysfs_flags,
        &sysfs_journal_debug,
-       &sysfs_btree_updates,
        &sysfs_btree_cache,
        &sysfs_btree_key_cache,
        &sysfs_new_stripes,
@@ -657,6 +659,7 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_trigger_gc,
        &sysfs_trigger_discards,
        &sysfs_trigger_invalidates,
+       &sysfs_trigger_journal_flush,
        &sysfs_prune_cache,
        &sysfs_btree_wakeup,
 
index b3fe9fc577470ff14659df531959c9e7aa6c324b..bfec656f94c0758ee081ea7d36fe1e272baca810 100644 (file)
@@ -672,7 +672,7 @@ static int __do_delete(struct btree_trans *trans, struct bpos pos)
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_xattrs, pos,
                             BTREE_ITER_INTENT);
-       k = bch2_btree_iter_peek(&iter);
+       k = bch2_btree_iter_peek_upto(&iter, POS(0, U64_MAX));
        ret = bkey_err(k);
        if (ret)
                goto err;
index b7e7c29278fc052a90fe7c029e8fd0626c48ddc5..5cf885b09986ac95effa15f7a37fc78bd56323cb 100644 (file)
@@ -788,6 +788,14 @@ static inline int copy_from_user_errcode(void *to, const void __user *from, unsi
 
 #endif
 
+static inline void mod_bit(long nr, volatile unsigned long *addr, bool v)
+{
+       if (v)
+               set_bit(nr, addr);
+       else
+               clear_bit(nr, addr);
+}
+
 static inline void __set_bit_le64(size_t bit, __le64 *addr)
 {
        addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
@@ -795,7 +803,7 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr)
 
 static inline void __clear_bit_le64(size_t bit, __le64 *addr)
 {
-       addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64));
+       addr[bit / 64] &= ~cpu_to_le64(BIT_ULL(bit % 64));
 }
 
 static inline bool test_bit_le64(size_t bit, __le64 *addr)
index dd6f566a383f00e83c9f36125ee4ecd3fc0e3541..121ab890bd0557e4779bd25d00dc422ba8fb1b3f 100644 (file)
@@ -1133,6 +1133,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
+       ret = btrfs_record_root_in_trans(trans, node->root);
+       if (ret)
+               return ret;
        ret = btrfs_update_delayed_inode(trans, node->root, path, node);
        return ret;
 }
index beedd6ed64d39bd7f53ad814c22df36b80b96235..257d044bca9158c95e205ff22ff0d662d0d7f074 100644 (file)
@@ -3464,6 +3464,14 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
        if (root_id != BTRFS_TREE_LOG_OBJECTID) {
                struct btrfs_ref generic_ref = { 0 };
 
+               /*
+                * Assert that the extent buffer is not cleared due to
+                * EXTENT_BUFFER_ZONED_ZEROOUT. Please refer
+                * btrfs_clear_buffer_dirty() and btree_csum_one_bio() for
+                * detail.
+                */
+               ASSERT(btrfs_header_bytenr(buf) != 0);
+
                btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF,
                                       buf->start, buf->len, parent,
                                       btrfs_header_owner(buf));
index 61594eaf1f8969fc3ba04604e3470a8932450767..2776112dbdf8d471a7cb4d515fdd443e6fadbac5 100644 (file)
@@ -681,31 +681,21 @@ static void end_bbio_data_read(struct btrfs_bio *bbio)
 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
                           gfp_t extra_gfp)
 {
+       const gfp_t gfp = GFP_NOFS | extra_gfp;
        unsigned int allocated;
 
        for (allocated = 0; allocated < nr_pages;) {
                unsigned int last = allocated;
 
-               allocated = alloc_pages_bulk_array(GFP_NOFS | extra_gfp,
-                                                  nr_pages, page_array);
-
-               if (allocated == nr_pages)
-                       return 0;
-
-               /*
-                * During this iteration, no page could be allocated, even
-                * though alloc_pages_bulk_array() falls back to alloc_page()
-                * if  it could not bulk-allocate. So we must be out of memory.
-                */
-               if (allocated == last) {
+               allocated = alloc_pages_bulk_array(gfp, nr_pages, page_array);
+               if (unlikely(allocated == last)) {
+                       /* No progress, fail and do cleanup. */
                        for (int i = 0; i < allocated; i++) {
                                __free_page(page_array[i]);
                                page_array[i] = NULL;
                        }
                        return -ENOMEM;
                }
-
-               memalloc_retry_wait(GFP_NOFS);
        }
        return 0;
 }
@@ -4154,7 +4144,7 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
         * The actual zeroout of the buffer will happen later in
         * btree_csum_one_bio.
         */
-       if (btrfs_is_zoned(fs_info)) {
+       if (btrfs_is_zoned(fs_info) && test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
                set_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags);
                return;
        }
@@ -4193,6 +4183,7 @@ void set_extent_buffer_dirty(struct extent_buffer *eb)
        num_folios = num_extent_folios(eb);
        WARN_ON(atomic_read(&eb->refs) == 0);
        WARN_ON(!test_bit(EXTENT_BUFFER_TREE_REF, &eb->bflags));
+       WARN_ON(test_bit(EXTENT_BUFFER_ZONED_ZEROOUT, &eb->bflags));
 
        if (!was_dirty) {
                bool subpage = eb->fs_info->nodesize < PAGE_SIZE;
index 37701531eeb1ba486cd8117f104794083dff8816..c65fe5de40220d3b51003bb73b3e6414eaefba08 100644 (file)
@@ -2533,7 +2533,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
                 */
                if (bits & EXTENT_CLEAR_META_RESV &&
                    root != fs_info->tree_root)
-                       btrfs_delalloc_release_metadata(inode, len, false);
+                       btrfs_delalloc_release_metadata(inode, len, true);
 
                /* For sanity tests. */
                if (btrfs_is_testing(fs_info))
@@ -4503,6 +4503,7 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
        struct btrfs_trans_handle *trans;
        struct btrfs_block_rsv block_rsv;
        u64 root_flags;
+       u64 qgroup_reserved = 0;
        int ret;
 
        down_write(&fs_info->subvol_sem);
@@ -4547,12 +4548,20 @@ int btrfs_delete_subvolume(struct btrfs_inode *dir, struct dentry *dentry)
        ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
        if (ret)
                goto out_undead;
+       qgroup_reserved = block_rsv.qgroup_rsv_reserved;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                goto out_release;
        }
+       ret = btrfs_record_root_in_trans(trans, root);
+       if (ret) {
+               btrfs_abort_transaction(trans, ret);
+               goto out_end_trans;
+       }
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+       qgroup_reserved = 0;
        trans->block_rsv = &block_rsv;
        trans->bytes_reserved = block_rsv.size;
 
@@ -4611,7 +4620,9 @@ out_end_trans:
        ret = btrfs_end_transaction(trans);
        inode->i_flags |= S_DEAD;
 out_release:
-       btrfs_subvolume_release_metadata(root, &block_rsv);
+       btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+       if (qgroup_reserved)
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 out_undead:
        if (ret) {
                spin_lock(&dest->root_item_lock);
index 294e31edec9d3bbe566e9234c8ef76d73612adbc..55f3ba6a831ca194e2d8405dbf7caa60fbd81dfc 100644 (file)
@@ -613,6 +613,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
        int ret;
        dev_t anon_dev;
        u64 objectid;
+       u64 qgroup_reserved = 0;
 
        root_item = kzalloc(sizeof(*root_item), GFP_KERNEL);
        if (!root_item)
@@ -650,13 +651,18 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
                                               trans_num_items, false);
        if (ret)
                goto out_new_inode_args;
+       qgroup_reserved = block_rsv.qgroup_rsv_reserved;
 
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_subvolume_release_metadata(root, &block_rsv);
-               goto out_new_inode_args;
+               goto out_release_rsv;
        }
+       ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+       if (ret)
+               goto out;
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+       qgroup_reserved = 0;
        trans->block_rsv = &block_rsv;
        trans->bytes_reserved = block_rsv.size;
        /* Tree log can't currently deal with an inode which is a new root. */
@@ -767,9 +773,11 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
 out:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
-       btrfs_subvolume_release_metadata(root, &block_rsv);
-
        btrfs_end_transaction(trans);
+out_release_rsv:
+       btrfs_block_rsv_release(fs_info, &block_rsv, (u64)-1, NULL);
+       if (qgroup_reserved)
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 out_new_inode_args:
        btrfs_new_inode_args_destroy(&new_inode_args);
 out_inode:
@@ -791,6 +799,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        struct btrfs_pending_snapshot *pending_snapshot;
        unsigned int trans_num_items;
        struct btrfs_trans_handle *trans;
+       struct btrfs_block_rsv *block_rsv;
+       u64 qgroup_reserved = 0;
        int ret;
 
        /* We do not support snapshotting right now. */
@@ -827,19 +837,19 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                goto free_pending;
        }
 
-       btrfs_init_block_rsv(&pending_snapshot->block_rsv,
-                            BTRFS_BLOCK_RSV_TEMP);
+       block_rsv = &pending_snapshot->block_rsv;
+       btrfs_init_block_rsv(block_rsv, BTRFS_BLOCK_RSV_TEMP);
        /*
         * 1 to add dir item
         * 1 to add dir index
         * 1 to update parent inode item
         */
        trans_num_items = create_subvol_num_items(inherit) + 3;
-       ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
-                                              &pending_snapshot->block_rsv,
+       ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, block_rsv,
                                               trans_num_items, false);
        if (ret)
                goto free_pending;
+       qgroup_reserved = block_rsv->qgroup_rsv_reserved;
 
        pending_snapshot->dentry = dentry;
        pending_snapshot->root = root;
@@ -852,6 +862,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                ret = PTR_ERR(trans);
                goto fail;
        }
+       ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
+       if (ret) {
+               btrfs_end_transaction(trans);
+               goto fail;
+       }
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+       qgroup_reserved = 0;
 
        trans->pending_snapshot = pending_snapshot;
 
@@ -881,7 +898,9 @@ fail:
        if (ret && pending_snapshot->snap)
                pending_snapshot->snap->anon_dev = 0;
        btrfs_put_root(pending_snapshot->snap);
-       btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
+       btrfs_block_rsv_release(fs_info, block_rsv, (u64)-1, NULL);
+       if (qgroup_reserved)
+               btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
 free_pending:
        if (pending_snapshot->anon_dev)
                free_anon_bdev(pending_snapshot->anon_dev);
index 5f90f0605b12f7126e93d69e7fbec42720301fad..cf8820ce7aa2979920c6daafc1071c26571ecee6 100644 (file)
@@ -4495,6 +4495,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
                                      BTRFS_QGROUP_RSV_META_PREALLOC);
        trace_qgroup_meta_convert(root, num_bytes);
        qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+       if (!sb_rdonly(fs_info->sb))
+               add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
 }
 
 /*
index 4bb538a372ce56404de84d6ddbca7fb951715949..7007f9e0c97282bc5f415f56d14e02e79895aafc 100644 (file)
@@ -548,13 +548,3 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
        }
        return ret;
 }
-
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
-                                     struct btrfs_block_rsv *rsv)
-{
-       struct btrfs_fs_info *fs_info = root->fs_info;
-       u64 qgroup_to_release;
-
-       btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
-       btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
-}
index 6f929cf3bd4967560964659ee9f631e6766a07ab..8f5739e732b9b6c9cc1d47ee34e20d50a403d90c 100644 (file)
@@ -18,8 +18,6 @@ struct btrfs_trans_handle;
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     struct btrfs_block_rsv *rsv,
                                     int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_root *root,
-                                     struct btrfs_block_rsv *rsv);
 int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
                       u64 ref_id, u64 dirid, u64 sequence,
                       const struct fscrypt_str *name);
index 46e8426adf4f15768507303430b38c9e6be56c7d..85f359e0e0a7f2ea078157c85a1f78b0ea2bcadd 100644 (file)
@@ -745,14 +745,6 @@ again:
                h->reloc_reserved = reloc_reserved;
        }
 
-       /*
-        * Now that we have found a transaction to be a part of, convert the
-        * qgroup reservation from prealloc to pertrans. A different transaction
-        * can't race in and free our pertrans out from under us.
-        */
-       if (qgroup_reserved)
-               btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
-
 got_it:
        if (!current->journal_info)
                current->journal_info = h;
@@ -786,8 +778,15 @@ got_it:
                 * not just freed.
                 */
                btrfs_end_transaction(h);
-               return ERR_PTR(ret);
+               goto reserve_fail;
        }
+       /*
+        * Now that we have found a transaction to be a part of, convert the
+        * qgroup reservation from prealloc to pertrans. A different transaction
+        * can't race in and free our pertrans out from under us.
+        */
+       if (qgroup_reserved)
+               btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
 
        return h;
 
@@ -1495,6 +1494,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
                        radix_tree_tag_clear(&fs_info->fs_roots_radix,
                                        (unsigned long)root->root_key.objectid,
                                        BTRFS_ROOT_TRANS_TAG);
+                       btrfs_qgroup_free_meta_all_pertrans(root);
                        spin_unlock(&fs_info->fs_roots_radix_lock);
 
                        btrfs_free_log(trans, root);
@@ -1519,7 +1519,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
                        if (ret2)
                                return ret2;
                        spin_lock(&fs_info->fs_roots_radix_lock);
-                       btrfs_qgroup_free_meta_all_pertrans(root);
                }
        }
        spin_unlock(&fs_info->fs_roots_radix_lock);
index 1340d77124ae4db09c3b96548acdf1cd8a6c3fb0..ee9caf7916fb95931e08e41467cc97ddba950c0b 100644 (file)
@@ -795,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
        ihold(inode);
 
        if (wbc->sync_mode == WB_SYNC_NONE &&
-           ceph_inode_to_fs_client(inode)->write_congested)
+           ceph_inode_to_fs_client(inode)->write_congested) {
+               redirty_page_for_writepage(wbc, page);
                return AOP_WRITEPAGE_ACTIVATE;
+       }
 
        wait_on_page_fscache(page);
 
index 55051ad09c19197e9b12d5d17068d20b04d6d3e6..c4941ba245ac3d0d3ae4e0f2598838b4ceb69ca9 100644 (file)
@@ -4783,13 +4783,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
 
                        doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
                              ceph_vinop(inode));
-                       spin_lock(&mdsc->cap_unlink_delay_lock);
+                       spin_lock(&mdsc->cap_delay_lock);
                        ci->i_ceph_flags |= CEPH_I_FLUSH;
                        if (!list_empty(&ci->i_cap_delay_list))
                                list_del_init(&ci->i_cap_delay_list);
                        list_add_tail(&ci->i_cap_delay_list,
                                      &mdsc->cap_unlink_delay_list);
-                       spin_unlock(&mdsc->cap_unlink_delay_lock);
+                       spin_unlock(&mdsc->cap_delay_lock);
 
                        /*
                         * Fire the work immediately, because the MDS maybe
index 3ab9c268a8bb398b779cc93d3da98f3d13df8fe3..360b686c3c67cfd1f256c656642957f6ca278427 100644 (file)
@@ -2504,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work)
        struct ceph_client *cl = mdsc->fsc->client;
 
        doutc(cl, "begin\n");
-       spin_lock(&mdsc->cap_unlink_delay_lock);
+       spin_lock(&mdsc->cap_delay_lock);
        while (!list_empty(&mdsc->cap_unlink_delay_list)) {
                struct ceph_inode_info *ci;
                struct inode *inode;
@@ -2516,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work)
 
                inode = igrab(&ci->netfs.inode);
                if (inode) {
-                       spin_unlock(&mdsc->cap_unlink_delay_lock);
+                       spin_unlock(&mdsc->cap_delay_lock);
                        doutc(cl, "on %p %llx.%llx\n", inode,
                              ceph_vinop(inode));
                        ceph_check_caps(ci, CHECK_CAPS_FLUSH);
                        iput(inode);
-                       spin_lock(&mdsc->cap_unlink_delay_lock);
+                       spin_lock(&mdsc->cap_delay_lock);
                }
        }
-       spin_unlock(&mdsc->cap_unlink_delay_lock);
+       spin_unlock(&mdsc->cap_delay_lock);
        doutc(cl, "done\n");
 }
 
@@ -5404,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
        INIT_LIST_HEAD(&mdsc->cap_wait_list);
        spin_lock_init(&mdsc->cap_delay_lock);
        INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
-       spin_lock_init(&mdsc->cap_unlink_delay_lock);
        INIT_LIST_HEAD(&mdsc->snap_flush_list);
        spin_lock_init(&mdsc->snap_flush_lock);
        mdsc->last_cap_flush_tid = 1;
index 03f8ff00874f727adff8b88cc8d538fc989692d8..b88e804152241281e5d1cd5ca90057d9deff9240 100644 (file)
@@ -461,9 +461,8 @@ struct ceph_mds_client {
        struct delayed_work    delayed_work;  /* delayed work */
        unsigned long    last_renew_caps;  /* last time we renewed our caps */
        struct list_head cap_delay_list;   /* caps with delayed release */
-       spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
        struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
-       spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
+       spinlock_t       cap_delay_lock;   /* protects cap_delay_list and cap_unlink_delay_list */
        struct list_head snap_flush_list;  /* cap_snaps ready to flush */
        spinlock_t       snap_flush_lock;
 
index e9df2f87072c687073abe9625e66886934497a02..8502ef68459b9842d090a4ac338591778d1b3b24 100644 (file)
@@ -636,11 +636,18 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
         * each file a separate locking class.  Let's differentiate on
         * whether the file has mmap or not for now.
         *
-        * Both paths of the branch look the same.  They're supposed to
+        * For similar reasons, writable and readonly files are given different
+        * lockdep key, because the writable file /sys/power/resume may call vfs
+        * lookup helpers for arbitrary paths and readonly files can be read by
+        * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
+        *
+        * All three cases look the same.  They're supposed to
         * look that way and give @of->mutex different static lockdep keys.
         */
        if (has_mmap)
                mutex_init(&of->mutex);
+       else if (file->f_mode & FMODE_WRITE)
+               mutex_init(&of->mutex);
        else
                mutex_init(&of->mutex);
 
index fac938f563ad022ce79cdc5f67321bc7f529cc1c..1955481832e03796170ea8f80361bc25cc452ca6 100644 (file)
@@ -3490,11 +3490,13 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                    struct dentry *dentry, const u32 *bmval,
                    int ignore_crossmnt)
 {
+       DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
        struct nfsd4_fattr_args args;
        struct svc_fh *tempfh = NULL;
        int starting_len = xdr->buf->len;
        __be32 *attrlen_p, status;
        int attrlen_offset;
+       u32 attrmask[3];
        int err;
        struct nfsd4_compoundres *resp = rqstp->rq_resp;
        u32 minorversion = resp->cstate.minorversion;
@@ -3502,10 +3504,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                .mnt    = exp->ex_path.mnt,
                .dentry = dentry,
        };
-       union {
-               u32             attrmask[3];
-               unsigned long   mask[2];
-       } u;
        unsigned long bit;
        bool file_modified = false;
        u64 size = 0;
@@ -3521,20 +3519,19 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
        /*
         * Make a local copy of the attribute bitmap that can be modified.
         */
-       memset(&u, 0, sizeof(u));
-       u.attrmask[0] = bmval[0];
-       u.attrmask[1] = bmval[1];
-       u.attrmask[2] = bmval[2];
+       attrmask[0] = bmval[0];
+       attrmask[1] = bmval[1];
+       attrmask[2] = bmval[2];
 
        args.rdattr_err = 0;
        if (exp->ex_fslocs.migrated) {
-               status = fattr_handle_absent_fs(&u.attrmask[0], &u.attrmask[1],
-                                               &u.attrmask[2], &args.rdattr_err);
+               status = fattr_handle_absent_fs(&attrmask[0], &attrmask[1],
+                                               &attrmask[2], &args.rdattr_err);
                if (status)
                        goto out;
        }
        args.size = 0;
-       if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
+       if (attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
                status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
                                        &file_modified, &size);
                if (status)
@@ -3553,16 +3550,16 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
        if (!(args.stat.result_mask & STATX_BTIME))
                /* underlying FS does not offer btime so we can't share it */
-               u.attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
-       if ((u.attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+               attrmask[1] &= ~FATTR4_WORD1_TIME_CREATE;
+       if ((attrmask[0] & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
                        FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
-           (u.attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
+           (attrmask[1] & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
                       FATTR4_WORD1_SPACE_TOTAL))) {
                err = vfs_statfs(&path, &args.statfs);
                if (err)
                        goto out_nfserr;
        }
-       if ((u.attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
+       if ((attrmask[0] & (FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FSID)) &&
            !fhp) {
                tempfh = kmalloc(sizeof(struct svc_fh), GFP_KERNEL);
                status = nfserr_jukebox;
@@ -3577,10 +3574,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
                args.fhp = fhp;
 
        args.acl = NULL;
-       if (u.attrmask[0] & FATTR4_WORD0_ACL) {
+       if (attrmask[0] & FATTR4_WORD0_ACL) {
                err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl);
                if (err == -EOPNOTSUPP)
-                       u.attrmask[0] &= ~FATTR4_WORD0_ACL;
+                       attrmask[0] &= ~FATTR4_WORD0_ACL;
                else if (err == -EINVAL) {
                        status = nfserr_attrnotsupp;
                        goto out;
@@ -3592,17 +3589,17 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 
 #ifdef CONFIG_NFSD_V4_SECURITY_LABEL
        args.context = NULL;
-       if ((u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
-            u.attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
+       if ((attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) ||
+            attrmask[0] & FATTR4_WORD0_SUPPORTED_ATTRS) {
                if (exp->ex_flags & NFSEXP_SECURITY_LABEL)
                        err = security_inode_getsecctx(d_inode(dentry),
                                                &args.context, &args.contextlen);
                else
                        err = -EOPNOTSUPP;
                args.contextsupport = (err == 0);
-               if (u.attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
+               if (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL) {
                        if (err == -EOPNOTSUPP)
-                               u.attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
+                               attrmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL;
                        else if (err)
                                goto out_nfserr;
                }
@@ -3610,8 +3607,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 #endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
 
        /* attrmask */
-       status = nfsd4_encode_bitmap4(xdr, u.attrmask[0],
-                                     u.attrmask[1], u.attrmask[2]);
+       status = nfsd4_encode_bitmap4(xdr, attrmask[0], attrmask[1],
+                                     attrmask[2]);
        if (status)
                goto out;
 
@@ -3620,7 +3617,9 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
        attrlen_p = xdr_reserve_space(xdr, XDR_UNIT);
        if (!attrlen_p)
                goto out_resource;
-       for_each_set_bit(bit, (const unsigned long *)&u.mask,
+       bitmap_from_arr32(attr_bitmap, attrmask,
+                         ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops));
+       for_each_set_bit(bit, attr_bitmap,
                         ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)) {
                status = nfsd4_enc_fattr4_encode_ops[bit](xdr, &args);
                if (status != nfs_ok)
index bc846b904b68d43816c48c69c3ae83152cadabf1..aee40db7a036fb9f7d34e2e456fb6d61ae3bbf2d 100644 (file)
@@ -240,7 +240,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
 
 #define S_SHIFT 12
 static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
        [S_IFREG >> S_SHIFT]    = NILFS_FT_REG_FILE,
        [S_IFDIR >> S_SHIFT]    = NILFS_FT_DIR,
        [S_IFCHR >> S_SHIFT]    = NILFS_FT_CHRDEV,
index 13a9d7acf8f8ec151323d18d44a346e060bf0ce2..0ff2491c311d8a669c709fb94eb4a16a54515c68 100644 (file)
@@ -433,8 +433,8 @@ smb2_close_cached_fid(struct kref *ref)
        if (cfid->is_open) {
                rc = SMB2_close(0, cfid->tcon, cfid->fid.persistent_fid,
                           cfid->fid.volatile_fid);
-               if (rc != -EBUSY && rc != -EAGAIN)
-                       atomic_dec(&cfid->tcon->num_remote_opens);
+               if (rc) /* should we retry on -EBUSY or -EAGAIN? */
+                       cifs_dbg(VFS, "close cached dir rc %d\n", rc);
        }
 
        free_cached_dir(cfid);
index f6a302205f89c456d9fa3adb3dae238deeb97d10..d6669ce4ae87f07415b150eaffcbf429c4fe74bd 100644 (file)
@@ -1077,6 +1077,7 @@ struct cifs_ses {
                                   and after mount option parsing we fill it */
        char *domainName;
        char *password;
+       char *password2; /* When key rotation used, new password may be set before it expires */
        char workstation_name[CIFS_MAX_WORKSTATION_LEN];
        struct session_key auth_key;
        struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
index 85679ae106fd50a4e3289349e2916204ae3f94fc..4e35970681bf052dc343c23935549600f5ce8859 100644 (file)
@@ -2183,6 +2183,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
        }
 
        ++delim;
+       /* BB consider adding support for password2 (Key Rotation) for multiuser in future */
        ctx->password = kstrndup(delim, len, GFP_KERNEL);
        if (!ctx->password) {
                cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
@@ -2206,6 +2207,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
                        kfree(ctx->username);
                        ctx->username = NULL;
                        kfree_sensitive(ctx->password);
+                       /* no need to free ctx->password2 since not allocated in this path */
                        ctx->password = NULL;
                        goto out_key_put;
                }
@@ -2317,6 +2319,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
                if (!ses->password)
                        goto get_ses_fail;
        }
+       /* ctx->password freed at unmount */
+       if (ctx->password2) {
+               ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
+               if (!ses->password2)
+                       goto get_ses_fail;
+       }
        if (ctx->domainname) {
                ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
                if (!ses->domainName)
index b7bfe705b2c498b83a60131713246bb9d37abf98..6c727d8c31e870ddd0f809db12b21aae76ac80cd 100644 (file)
@@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
        fsparam_string("username", Opt_user),
        fsparam_string("pass", Opt_pass),
        fsparam_string("password", Opt_pass),
+       fsparam_string("password2", Opt_pass2),
        fsparam_string("ip", Opt_ip),
        fsparam_string("addr", Opt_ip),
        fsparam_string("domain", Opt_domain),
@@ -345,6 +346,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
        new_ctx->nodename = NULL;
        new_ctx->username = NULL;
        new_ctx->password = NULL;
+       new_ctx->password2 = NULL;
        new_ctx->server_hostname = NULL;
        new_ctx->domainname = NULL;
        new_ctx->UNC = NULL;
@@ -357,6 +359,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx
        DUP_CTX_STR(prepath);
        DUP_CTX_STR(username);
        DUP_CTX_STR(password);
+       DUP_CTX_STR(password2);
        DUP_CTX_STR(server_hostname);
        DUP_CTX_STR(UNC);
        DUP_CTX_STR(source);
@@ -905,6 +908,8 @@ static int smb3_reconfigure(struct fs_context *fc)
        else  {
                kfree_sensitive(ses->password);
                ses->password = kstrdup(ctx->password, GFP_KERNEL);
+               kfree_sensitive(ses->password2);
+               ses->password2 = kstrdup(ctx->password2, GFP_KERNEL);
        }
        STEAL_STRING(cifs_sb, ctx, domainname);
        STEAL_STRING(cifs_sb, ctx, nodename);
@@ -1305,6 +1310,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                        goto cifs_parse_mount_err;
                }
                break;
+       case Opt_pass2:
+               kfree_sensitive(ctx->password2);
+               ctx->password2 = NULL;
+               if (strlen(param->string) == 0)
+                       break;
+
+               ctx->password2 = kstrdup(param->string, GFP_KERNEL);
+               if (ctx->password2 == NULL) {
+                       cifs_errorf(fc, "OOM when copying password2 string\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
        case Opt_ip:
                if (strlen(param->string) == 0) {
                        ctx->got_ip = false;
@@ -1608,6 +1625,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
  cifs_parse_mount_err:
        kfree_sensitive(ctx->password);
        ctx->password = NULL;
+       kfree_sensitive(ctx->password2);
+       ctx->password2 = NULL;
        return -EINVAL;
 }
 
@@ -1713,6 +1732,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
        ctx->username = NULL;
        kfree_sensitive(ctx->password);
        ctx->password = NULL;
+       kfree_sensitive(ctx->password2);
+       ctx->password2 = NULL;
        kfree(ctx->server_hostname);
        ctx->server_hostname = NULL;
        kfree(ctx->UNC);
index 8a35645e0b65b244741da59177a2bcb0acea0256..a947bddeba273ea850b3502f07555a19316266a6 100644 (file)
@@ -145,6 +145,7 @@ enum cifs_param {
        Opt_source,
        Opt_user,
        Opt_pass,
+       Opt_pass2,
        Opt_ip,
        Opt_domain,
        Opt_srcaddr,
@@ -177,6 +178,7 @@ struct smb3_fs_context {
 
        char *username;
        char *password;
+       char *password2;
        char *domainname;
        char *source;
        char *server_hostname;
index 91b07ef9e25ca1c195bef21d06c92f5193633022..60afab5c83d410a9c5122d5f4826ade67cb93dee 100644 (file)
@@ -1105,7 +1105,8 @@ static int cifs_get_fattr(struct cifs_open_info_data *data,
                } else {
                        cifs_open_info_to_fattr(fattr, data, sb);
                }
-               if (!rc && fattr->cf_flags & CIFS_FATTR_DELETE_PENDING)
+               if (!rc && *inode &&
+                   (fattr->cf_flags & CIFS_FATTR_DELETE_PENDING))
                        cifs_mark_open_handles_for_deleted_file(*inode, full_path);
                break;
        case -EREMOTE:
index 33ac4f8f5050c416cd2004ee4516756edd3b11d8..7d15a1969b818439515b5188e8662a3b8f1276ce 100644 (file)
@@ -98,6 +98,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
        kfree(buf_to_free->serverDomain);
        kfree(buf_to_free->serverNOS);
        kfree_sensitive(buf_to_free->password);
+       kfree_sensitive(buf_to_free->password2);
        kfree(buf_to_free->user_name);
        kfree(buf_to_free->domainName);
        kfree_sensitive(buf_to_free->auth_key.response);
index b156eefa75d7cb4b13d1bf402234f08271a558ad..78c94d0350fe9970fab31564aeba6870d71859bd 100644 (file)
@@ -4964,68 +4964,84 @@ static int smb2_next_header(struct TCP_Server_Info *server, char *buf,
        return 0;
 }
 
-int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
-                      struct dentry *dentry, struct cifs_tcon *tcon,
-                      const char *full_path, umode_t mode, dev_t dev)
+static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                               struct dentry *dentry, struct cifs_tcon *tcon,
+                               const char *full_path, umode_t mode, dev_t dev)
 {
-       struct cifs_open_info_data buf = {};
        struct TCP_Server_Info *server = tcon->ses->server;
        struct cifs_open_parms oparms;
        struct cifs_io_parms io_parms = {};
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct cifs_fid fid;
        unsigned int bytes_written;
-       struct win_dev *pdev;
+       struct win_dev pdev = {};
        struct kvec iov[2];
        __u32 oplock = server->oplocks ? REQ_OPLOCK : 0;
        int rc;
 
-       if (!S_ISCHR(mode) && !S_ISBLK(mode) && !S_ISFIFO(mode))
+       switch (mode & S_IFMT) {
+       case S_IFCHR:
+               strscpy(pdev.type, "IntxCHR");
+               pdev.major = cpu_to_le64(MAJOR(dev));
+               pdev.minor = cpu_to_le64(MINOR(dev));
+               break;
+       case S_IFBLK:
+               strscpy(pdev.type, "IntxBLK");
+               pdev.major = cpu_to_le64(MAJOR(dev));
+               pdev.minor = cpu_to_le64(MINOR(dev));
+               break;
+       case S_IFIFO:
+               strscpy(pdev.type, "LnxFIFO");
+               break;
+       default:
                return -EPERM;
+       }
 
-       oparms = (struct cifs_open_parms) {
-               .tcon = tcon,
-               .cifs_sb = cifs_sb,
-               .desired_access = GENERIC_WRITE,
-               .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR |
-                                                     CREATE_OPTION_SPECIAL),
-               .disposition = FILE_CREATE,
-               .path = full_path,
-               .fid = &fid,
-       };
+       oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, GENERIC_WRITE,
+                            FILE_CREATE, CREATE_NOT_DIR |
+                            CREATE_OPTION_SPECIAL, ACL_NO_MODE);
+       oparms.fid = &fid;
 
-       rc = server->ops->open(xid, &oparms, &oplock, &buf);
+       rc = server->ops->open(xid, &oparms, &oplock, NULL);
        if (rc)
                return rc;
 
-       /*
-        * BB Do not bother to decode buf since no local inode yet to put
-        * timestamps in, but we can reuse it safely.
-        */
-       pdev = (struct win_dev *)&buf.fi;
        io_parms.pid = current->tgid;
        io_parms.tcon = tcon;
-       io_parms.length = sizeof(*pdev);
-       iov[1].iov_base = pdev;
-       iov[1].iov_len = sizeof(*pdev);
-       if (S_ISCHR(mode)) {
-               memcpy(pdev->type, "IntxCHR", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-       } else if (S_ISBLK(mode)) {
-               memcpy(pdev->type, "IntxBLK", 8);
-               pdev->major = cpu_to_le64(MAJOR(dev));
-               pdev->minor = cpu_to_le64(MINOR(dev));
-       } else if (S_ISFIFO(mode)) {
-               memcpy(pdev->type, "LnxFIFO", 8);
-       }
+       io_parms.length = sizeof(pdev);
+       iov[1].iov_base = &pdev;
+       iov[1].iov_len = sizeof(pdev);
 
        rc = server->ops->sync_write(xid, &fid, &io_parms,
                                     &bytes_written, iov, 1);
        server->ops->close(xid, tcon, &fid);
-       d_drop(dentry);
-       /* FIXME: add code here to set EAs */
-       cifs_free_open_info(&buf);
+       return rc;
+}
+
+int cifs_sfu_make_node(unsigned int xid, struct inode *inode,
+                      struct dentry *dentry, struct cifs_tcon *tcon,
+                      const char *full_path, umode_t mode, dev_t dev)
+{
+       struct inode *new = NULL;
+       int rc;
+
+       rc = __cifs_sfu_make_node(xid, inode, dentry, tcon,
+                                 full_path, mode, dev);
+       if (rc)
+               return rc;
+
+       if (tcon->posix_extensions) {
+               rc = smb311_posix_get_inode_info(&new, full_path, NULL,
+                                                inode->i_sb, xid);
+       } else if (tcon->unix_ext) {
+               rc = cifs_get_inode_info_unix(&new, full_path,
+                                             inode->i_sb, xid);
+       } else {
+               rc = cifs_get_inode_info(&new, full_path, NULL,
+                                        inode->i_sb, xid, NULL);
+       }
+       if (!rc)
+               d_instantiate(dentry, new);
        return rc;
 }
 
index c0c4933af5fc386911922b4e23c7869bdea8098b..86c647a947ccd1065a8edb0712e113351839b96f 100644 (file)
@@ -367,6 +367,17 @@ again:
                }
 
                rc = cifs_setup_session(0, ses, server, nls_codepage);
+               if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) {
+                       /*
+                        * Try alternate password for next reconnect (key rotation
+                        * could be enabled on the server e.g.) if an alternate
+                        * password is available and the current password is expired,
+                        * but do not swap on non pwd related errors like host down
+                        */
+                       if (ses->password2)
+                               swap(ses->password2, ses->password);
+               }
+
                if ((rc == -EACCES) && !tcon->retry) {
                        mutex_unlock(&ses->session_mutex);
                        rc = -EHOSTDOWN;
index aa3411354e66d00c48db30adf73b34eedb84cb6d..16bd693d0b3aa23ce87af9cc1540e113a4c2a286 100644 (file)
@@ -48,6 +48,10 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
        gid_t i_gid;
        int err;
 
+       inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
+       if (inode->i_ino == 0)
+               return -EINVAL;
+
        err = squashfs_get_id(sb, le16_to_cpu(sqsh_ino->uid), &i_uid);
        if (err)
                return err;
@@ -58,7 +62,6 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
 
        i_uid_write(inode, i_uid);
        i_gid_write(inode, i_gid);
-       inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
        inode_set_mtime(inode, le32_to_cpu(sqsh_ino->mtime), 0);
        inode_set_atime(inode, inode_get_mtime_sec(inode), 0);
        inode_set_ctime(inode, inode_get_mtime_sec(inode), 0);
index dc067eeb638744b72be8487102498e9d010b2945..894c6ca1e5002015b378ad8d6ec598de44c25ebe 100644 (file)
@@ -336,6 +336,7 @@ static void update_inode_attr(struct dentry *dentry, struct inode *inode,
 
 /**
  * lookup_file - look up a file in the tracefs filesystem
+ * @parent_ei: Pointer to the eventfs_inode that represents parent of the file
  * @dentry: the dentry to look up
  * @mode: the permission that the file should have.
  * @attr: saved attributes changed by user
@@ -389,6 +390,7 @@ static struct dentry *lookup_file(struct eventfs_inode *parent_ei,
 /**
  * lookup_dir_entry - look up a dir in the tracefs filesystem
  * @dentry: the directory to look up
+ * @pei: Pointer to the parent eventfs_inode if available
  * @ei: the eventfs_inode that represents the directory to create
  *
  * This function will look up a dentry for a directory represented by
@@ -478,16 +480,20 @@ void eventfs_d_release(struct dentry *dentry)
 
 /**
  * lookup_file_dentry - create a dentry for a file of an eventfs_inode
+ * @dentry: The parent dentry under which the new file's dentry will be created
  * @ei: the eventfs_inode that the file will be created under
  * @idx: the index into the entry_attrs[] of the @ei
- * @parent: The parent dentry of the created file.
- * @name: The name of the file to create
  * @mode: The mode of the file.
  * @data: The data to use to set the inode of the file with on open()
  * @fops: The fops of the file to be created.
  *
- * Create a dentry for a file of an eventfs_inode @ei and place it into the
- * address located at @e_dentry.
+ * This function creates a dentry for a file associated with an
+ * eventfs_inode @ei. It uses the entry attributes specified by @idx,
+ * if available. The file will have the specified @mode and its inode will be
+ * set up with @data upon open. The file operations will be set to @fops.
+ *
+ * Return: Returns a pointer to the newly created file's dentry or an error
+ * pointer.
  */
 static struct dentry *
 lookup_file_dentry(struct dentry *dentry,
index c6a124e8d565febb690377ae982f60042ba2383b..964fa7f2400335dc8eb9456c3190aa36f2c0c8ec 100644 (file)
@@ -1048,7 +1048,7 @@ static int zonefs_init_zgroup(struct super_block *sb,
        zonefs_info(sb, "Zone group \"%s\" has %u file%s\n",
                    zonefs_zgroup_name(ztype),
                    zgroup->g_nr_zones,
-                   zgroup->g_nr_zones > 1 ? "s" : "");
+                   str_plural(zgroup->g_nr_zones));
 
        return 0;
 }
index 5de954e2b18aaac5c0796466d256f6cd10e1130d..e7796f373d0dac4daa5c322a7ba82983b9a8ac81 100644 (file)
@@ -911,17 +911,19 @@ static inline bool acpi_int_uid_match(struct acpi_device *adev, u64 uid2)
  * acpi_dev_hid_uid_match - Match device by supplied HID and UID
  * @adev: ACPI device to match.
  * @hid2: Hardware ID of the device.
- * @uid2: Unique ID of the device, pass 0 or NULL to not check _UID.
+ * @uid2: Unique ID of the device, pass NULL to not check _UID.
  *
  * Matches HID and UID in @adev with given @hid2 and @uid2. Absence of @uid2
  * will be treated as a match. If user wants to validate @uid2, it should be
  * done before calling this function.
  *
- * Returns: %true if matches or @uid2 is 0 or NULL, %false otherwise.
+ * Returns: %true if matches or @uid2 is NULL, %false otherwise.
  */
 #define acpi_dev_hid_uid_match(adev, hid2, uid2)                       \
        (acpi_dev_hid_match(adev, hid2) &&                              \
-               (!(uid2) || acpi_dev_uid_match(adev, uid2)))
+               /* Distinguish integer 0 from NULL @uid2 */             \
+               (_Generic(uid2, ACPI_STR_TYPES(!(uid2)), default: 0) || \
+               acpi_dev_uid_match(adev, uid2)))
 
 void acpi_dev_clear_dependencies(struct acpi_device *supplier);
 bool acpi_dev_ready_for_enumeration(const struct acpi_device *device);
index 6e794420bd398c7e4848cadebdc107116cfb6af2..b7de3a4eade1c265acc4f92b53d5617d1ae3cb87 100644 (file)
@@ -156,7 +156,10 @@ extern __printf(1, 2) void __warn_printk(const char *fmt, ...);
 
 #else /* !CONFIG_BUG */
 #ifndef HAVE_ARCH_BUG
-#define BUG() do {} while (1)
+#define BUG() do {             \
+       do {} while (1);        \
+       unreachable();          \
+} while (0)
 #endif
 
 #ifndef HAVE_ARCH_BUG_ON
index 87e3d49a4e29bf7af1de43d1da45bfd9ca3791ea..814207e7c37fcf17a65638f68ceda02e400c58fa 100644 (file)
@@ -512,13 +512,9 @@ struct hv_proximity_domain_flags {
        u32 proximity_info_valid : 1;
 } __packed;
 
-/* Not a union in windows but useful for zeroing */
-union hv_proximity_domain_info {
-       struct {
-               u32 domain_id;
-               struct hv_proximity_domain_flags flags;
-       };
-       u64 as_uint64;
+struct hv_proximity_domain_info {
+       u32 domain_id;
+       struct hv_proximity_domain_flags flags;
 } __packed;
 
 struct hv_lp_startup_status {
@@ -532,14 +528,13 @@ struct hv_lp_startup_status {
 } __packed;
 
 /* HvAddLogicalProcessor hypercall */
-struct hv_add_logical_processor_in {
+struct hv_input_add_logical_processor {
        u32 lp_index;
        u32 apic_id;
-       union hv_proximity_domain_info proximity_domain_info;
-       u64 flags;
+       struct hv_proximity_domain_info proximity_domain_info;
 } __packed;
 
-struct hv_add_logical_processor_out {
+struct hv_output_add_logical_processor {
        struct hv_lp_startup_status startup_status;
 } __packed;
 
@@ -560,7 +555,7 @@ struct hv_create_vp {
        u8 padding[3];
        u8 subnode_type;
        u64 subnode_id;
-       union hv_proximity_domain_info proximity_domain_info;
+       struct hv_proximity_domain_info proximity_domain_info;
        u64 flags;
 } __packed;
 
index 99935779682dc29180f556469c4487603b082740..8fe7aaab25990aa2fdebd81463b9ac9dedd36945 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/types.h>
 #include <linux/atomic.h>
 #include <linux/bitops.h>
+#include <acpi/acpi_numa.h>
 #include <linux/cpumask.h>
 #include <linux/nmi.h>
 #include <asm/ptrace.h>
@@ -67,6 +68,19 @@ extern u64 hv_do_fast_hypercall8(u16 control, u64 input8);
 bool hv_isolation_type_snp(void);
 bool hv_isolation_type_tdx(void);
 
+static inline struct hv_proximity_domain_info hv_numa_node_to_pxm_info(int node)
+{
+       struct hv_proximity_domain_info pxm_info = {};
+
+       if (node != NUMA_NO_NODE) {
+               pxm_info.domain_id = node_to_pxm(node);
+               pxm_info.flags.proximity_info_valid = 1;
+               pxm_info.flags.proximity_preferred = 1;
+       }
+
+       return pxm_info;
+}
+
 /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */
 static inline int hv_result(u64 status)
 {
index c00cc6c0878a1e173701a6267ac062fa3d41b790..8c252e073bd8103c8b13f39d90a965370b16d37d 100644 (file)
@@ -268,7 +268,7 @@ static inline void *offset_to_ptr(const int *off)
  *   - When one operand is a null pointer constant (i.e. when x is an integer
  *     constant expression) and the other is an object pointer (i.e. our
  *     third operand), the conditional operator returns the type of the
- *     object pointer operand (i.e. "int *). Here, within the sizeof(), we
+ *     object pointer operand (i.e. "int *"). Here, within the sizeof(), we
  *     would then get:
  *       sizeof(*((int *)(...))  == sizeof(int)  == 4
  *   - When one operand is a void pointer (i.e. when x is not an integer
index e06bad467f55ef1befdad569f0a8a37875def383..c3f9bb6602ba2135cae645bda4d730cd703a12a6 100644 (file)
@@ -682,4 +682,11 @@ static inline bool dma_fence_is_container(struct dma_fence *fence)
        return dma_fence_is_array(fence) || dma_fence_is_chain(fence);
 }
 
+#define DMA_FENCE_WARN(f, fmt, args...) \
+       do {                                                            \
+               struct dma_fence *__ff = (f);                           \
+               pr_warn("f %llu#%llu: " fmt, __ff->context, __ff->seqno,\
+                        ##args);                                       \
+       } while (0)
+
 #endif /* __LINUX_DMA_FENCE_H */
index 868c8fb1bbc1c2dabd708bc2c6485c2e42dee8fe..13becafe41df00f94dddb5e4f0417d3447c6456c 100644 (file)
@@ -2,6 +2,8 @@
 #ifndef __LINUX_GFP_TYPES_H
 #define __LINUX_GFP_TYPES_H
 
+#include <linux/bits.h>
+
 /* The typedef is in types.h but we want the documentation here */
 #if 0
 /**
index 6c75c8bd44a0bb627020ba267c3d0debb2379ba4..1a14e239221f7e9aec06b510d450c9f30b34fc2c 100644 (file)
@@ -2,7 +2,6 @@
 #ifndef __LINUX_GPIO_PROPERTY_H
 #define __LINUX_GPIO_PROPERTY_H
 
-#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
 #include <linux/property.h>
 
 #define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
index 6ef0557b4bff8ed5d14bc18391d356913136c23c..96ceb4095425eb39aa8145fda63cc1d859fb56f5 100644 (file)
@@ -832,6 +832,7 @@ struct vmbus_gpadl {
        u32 gpadl_handle;
        u32 size;
        void *buffer;
+       bool decrypted;
 };
 
 struct vmbus_channel {
index 05df0e399d7c0b84236198f57e0c61e90412beaa..ac333ea81d319526d5fde59bf9f64b5510f94e41 100644 (file)
@@ -13,7 +13,7 @@ enum {
         * A hint to not wake right away but delay until there are enough of
         * tw's queued to match the number of CQEs the task is waiting for.
         *
-        * Must not be used wirh requests generating more than one CQE.
+        * Must not be used with requests generating more than one CQE.
         * It's also ignored unless IORING_SETUP_DEFER_TASKRUN is set.
         */
        IOU_F_TWQ_LAZY_WAKE                     = 1,
index 147feebd508cabfa98a1844fb75e2235fb9b56d6..3f003d5fde5341bd789d0d1286109563624090d3 100644 (file)
@@ -114,7 +114,7 @@ do {                                                \
 # define lockdep_softirq_enter()               do { } while (0)
 # define lockdep_softirq_exit()                        do { } while (0)
 # define lockdep_hrtimer_enter(__hrtimer)      false
-# define lockdep_hrtimer_exit(__context)       do { } while (0)
+# define lockdep_hrtimer_exit(__context)       do { (void)(__context); } while (0)
 # define lockdep_posixtimer_enter()            do { } while (0)
 # define lockdep_posixtimer_exit()             do { } while (0)
 # define lockdep_irq_work_enter(__work)                do { } while (0)
index 0436b919f1c7fc535b30400bf95affc7e4534186..7b0ee64225de9cefebaf63e0d759c082684e3ab5 100644 (file)
@@ -2207,11 +2207,6 @@ static inline int arch_make_folio_accessible(struct folio *folio)
  */
 #include <linux/vmstat.h>
 
-static __always_inline void *lowmem_page_address(const struct page *page)
-{
-       return page_to_virt(page);
-}
-
 #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL)
 #define HASHED_PAGE_VIRTUAL
 #endif
@@ -2234,6 +2229,11 @@ void set_page_address(struct page *page, void *virtual);
 void page_address_init(void);
 #endif
 
+static __always_inline void *lowmem_page_address(const struct page *page)
+{
+       return page_to_virt(page);
+}
+
 #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
 #define page_address(page) lowmem_page_address(page)
 #define set_page_address(page, address)  do { } while(0)
index 5d868505a94e43fe4f6124915e90dc814c269278..6d92b68efbf6c3afe86b9f6c22a8759ce51e7a28 100644 (file)
@@ -80,7 +80,7 @@ DECLARE_PER_CPU(u32, kstack_offset);
        if (static_branch_maybe(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT, \
                                &randomize_kstack_offset)) {            \
                u32 offset = raw_cpu_read(kstack_offset);               \
-               offset ^= (rand);                                       \
+               offset = ror32(offset, 5) ^ (rand);                     \
                raw_cpu_write(kstack_offset, offset);                   \
        }                                                               \
 } while (0)
index 29c4e4f243e47d580945626da4a172e2ecff0c5b..f2394a409c9d5e478844b0d6a43011f4447798a0 100644 (file)
@@ -31,9 +31,9 @@ static __always_inline bool rw_base_is_locked(const struct rwbase_rt *rwb)
        return atomic_read(&rwb->readers) != READER_BIAS;
 }
 
-static inline void rw_base_assert_held_write(const struct rwbase_rt *rwb)
+static __always_inline bool rw_base_is_write_locked(const struct rwbase_rt *rwb)
 {
-       WARN_ON(atomic_read(&rwb->readers) != WRITER_BIAS);
+       return atomic_read(&rwb->readers) == WRITER_BIAS;
 }
 
 static __always_inline bool rw_base_is_contended(const struct rwbase_rt *rwb)
index 4f1c18992f768fe67faffa139f259e8213a93f9e..c8b543d428b0a8d4662183f3342e88ec61d10189 100644 (file)
@@ -167,14 +167,14 @@ static __always_inline int rwsem_is_locked(const struct rw_semaphore *sem)
        return rw_base_is_locked(&sem->rwbase);
 }
 
-static inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_nolockdep(const struct rw_semaphore *sem)
 {
        WARN_ON(!rwsem_is_locked(sem));
 }
 
-static inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
+static __always_inline void rwsem_assert_held_write_nolockdep(const struct rw_semaphore *sem)
 {
-       rw_base_assert_held_write(sem);
+       WARN_ON(!rw_base_is_write_locked(&sem->rwbase));
 }
 
 static __always_inline int rwsem_is_contended(struct rw_semaphore *sem)
index a4c15db2f5e5401373e66a1b1f66ffd5e97ae35e..3fb18f7eb73eafecf8101a6e73a141cc4d46a0f9 100644 (file)
@@ -110,8 +110,17 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping,
 extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end);
 int shmem_unuse(unsigned int type);
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
                          struct mm_struct *mm, unsigned long vm_flags);
+#else
+static __always_inline bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
+                                         struct mm_struct *mm, unsigned long vm_flags)
+{
+       return false;
+}
+#endif
+
 #ifdef CONFIG_SHMEM
 extern unsigned long shmem_swap_usage(struct vm_area_struct *vma);
 #else
index 307961b41541a620023ad40d3178d47b94768126..317200cd3a603e213228e1b5b561b9757f3f3940 100644 (file)
@@ -50,11 +50,36 @@ static inline int copy_from_sockptr_offset(void *dst, sockptr_t src,
        return 0;
 }
 
+/* Deprecated.
+ * This is unsafe, unless caller checked user provided optlen.
+ * Prefer copy_safe_from_sockptr() instead.
+ */
 static inline int copy_from_sockptr(void *dst, sockptr_t src, size_t size)
 {
        return copy_from_sockptr_offset(dst, src, 0, size);
 }
 
+/**
+ * copy_safe_from_sockptr: copy a struct from sockptr
+ * @dst:   Destination address, in kernel space. This buffer must be @ksize
+ *         bytes long.
+ * @ksize: Size of @dst struct.
+ * @optval: Source address. (in user or kernel space)
+ * @optlen: Size of @optval data.
+ *
+ * Returns:
+ *  * -EINVAL: @optlen < @ksize
+ *  * -EFAULT: access to userspace failed.
+ *  * 0 : @ksize bytes were copied
+ */
+static inline int copy_safe_from_sockptr(void *dst, size_t ksize,
+                                        sockptr_t optval, unsigned int optlen)
+{
+       if (optlen < ksize)
+               return -EINVAL;
+       return copy_from_sockptr(dst, optval, ksize);
+}
+
 static inline int copy_struct_from_sockptr(void *dst, size_t ksize,
                sockptr_t src, size_t usize)
 {
index 48b700ba1d188a798209d4de4693173bfc6b98af..a5c560a2f8c25867e8e3e53588d551ea3b356feb 100644 (file)
@@ -390,6 +390,35 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
 }
 #endif /* CONFIG_MIGRATION */
 
+#ifdef CONFIG_MEMORY_FAILURE
+
+/*
+ * Support for hardware poisoned pages
+ */
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       BUG_ON(!PageLocked(page));
+       return swp_entry(SWP_HWPOISON, page_to_pfn(page));
+}
+
+static inline int is_hwpoison_entry(swp_entry_t entry)
+{
+       return swp_type(entry) == SWP_HWPOISON;
+}
+
+#else
+
+static inline swp_entry_t make_hwpoison_entry(struct page *page)
+{
+       return swp_entry(0, 0);
+}
+
+static inline int is_hwpoison_entry(swp_entry_t swp)
+{
+       return 0;
+}
+#endif
+
 typedef unsigned long pte_marker;
 
 #define  PTE_MARKER_UFFD_WP                    BIT(0)
@@ -483,8 +512,9 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
 
 /*
  * A pfn swap entry is a special type of swap entry that always has a pfn stored
- * in the swap offset. They are used to represent unaddressable device memory
- * and to restrict access to a page undergoing migration.
+ * in the swap offset. They can either be used to represent unaddressable device
+ * memory, to restrict access to a page undergoing migration or to represent a
+ * pfn which has been hwpoisoned and unmapped.
  */
 static inline bool is_pfn_swap_entry(swp_entry_t entry)
 {
@@ -492,7 +522,7 @@ static inline bool is_pfn_swap_entry(swp_entry_t entry)
        BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
 
        return is_migration_entry(entry) || is_device_private_entry(entry) ||
-              is_device_exclusive_entry(entry);
+              is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
 }
 
 struct page_vma_mapped_walk;
@@ -561,35 +591,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
 }
 #endif  /* CONFIG_ARCH_ENABLE_THP_MIGRATION */
 
-#ifdef CONFIG_MEMORY_FAILURE
-
-/*
- * Support for hardware poisoned pages
- */
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
-       BUG_ON(!PageLocked(page));
-       return swp_entry(SWP_HWPOISON, page_to_pfn(page));
-}
-
-static inline int is_hwpoison_entry(swp_entry_t entry)
-{
-       return swp_type(entry) == SWP_HWPOISON;
-}
-
-#else
-
-static inline swp_entry_t make_hwpoison_entry(struct page *page)
-{
-       return swp_entry(0, 0);
-}
-
-static inline int is_hwpoison_entry(swp_entry_t swp)
-{
-       return 0;
-}
-#endif
-
 static inline int non_swap_entry(swp_entry_t entry)
 {
        return swp_type(entry) >= MAX_SWAPFILES;
index ffe48e69b3f3ae136c97df9193e066f302ae40e4..457879938fc198b7104cdd12f7290c439b44e3e2 100644 (file)
@@ -135,10 +135,11 @@ static inline void u64_stats_inc(u64_stats_t *p)
        p->v++;
 }
 
-static inline void u64_stats_init(struct u64_stats_sync *syncp)
-{
-       seqcount_init(&syncp->seq);
-}
+#define u64_stats_init(syncp)                          \
+       do {                                            \
+               struct u64_stats_sync *__s = (syncp);   \
+               seqcount_init(&__s->seq);               \
+       } while (0)
 
 static inline void __u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
index 17539d08966618bfe4c2f0b41f4893c8951b22fe..e398e1dbd2d365c54874a6f368adacf6734e9f03 100644 (file)
@@ -108,7 +108,7 @@ struct udp_sock {
 #define udp_assign_bit(nr, sk, val)            \
        assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
 
-#define UDP_MAX_SEGMENTS       (1 << 6UL)
+#define UDP_MAX_SEGMENTS       (1 << 7UL)
 
 #define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
 
index b0201747a263a9526c5d60c2c2644a8e064a8439..26c4325aa3734eaa32ffbbdd862b151b93e7fdf8 100644 (file)
@@ -170,7 +170,7 @@ size_t virtio_max_dma_size(const struct virtio_device *vdev);
 
 /**
  * struct virtio_driver - operations for a virtio I/O driver
- * @driver: underlying device driver (populate name and owner).
+ * @driver: underlying device driver (populate name).
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this driver.
  * @feature_table_size: number of entries in the feature table array.
@@ -208,7 +208,10 @@ static inline struct virtio_driver *drv_to_virtio(struct device_driver *drv)
        return container_of(drv, struct virtio_driver, driver);
 }
 
-int register_virtio_driver(struct virtio_driver *drv);
+/* use a macro to avoid include chaining to get THIS_MODULE */
+#define register_virtio_driver(drv) \
+       __register_virtio_driver(drv, THIS_MODULE)
+int __register_virtio_driver(struct virtio_driver *drv, struct module *owner);
 void unregister_virtio_driver(struct virtio_driver *drv);
 
 /* module_virtio_driver() - Helper macro for drivers that don't do
index 9d06eb945509ecfcf01bec1ffa8481262931c5bd..62a407db1bf5ff6dd3298077e05e636ccc2ff97b 100644 (file)
@@ -438,6 +438,10 @@ static inline void in6_ifa_hold(struct inet6_ifaddr *ifp)
        refcount_inc(&ifp->refcnt);
 }
 
+static inline bool in6_ifa_hold_safe(struct inet6_ifaddr *ifp)
+{
+       return refcount_inc_not_zero(&ifp->refcnt);
+}
 
 /*
  *     compute link-local solicited-node multicast address
index 9fe95a22abeb7e2fb12d7384a974e5689db61211..eaec5d6caa29d293902f86666c91cceebd6f388c 100644 (file)
@@ -585,6 +585,15 @@ static inline struct sk_buff *bt_skb_sendmmsg(struct sock *sk,
        return skb;
 }
 
+static inline int bt_copy_from_sockptr(void *dst, size_t dst_size,
+                                      sockptr_t src, size_t src_size)
+{
+       if (dst_size > src_size)
+               return -EINVAL;
+
+       return copy_from_sockptr(dst, src, dst_size);
+}
+
 int bt_to_errno(u16 code);
 __u8 bt_status(int err);
 
index 5cd64bb2104df389250fb3c518ba00a3826c53f7..c286cc2e766ee04a77206b7c326b4283de43933e 100644 (file)
@@ -361,6 +361,39 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb)
        return pskb_network_may_pull(skb, nhlen);
 }
 
+/* Variant of pskb_inet_may_pull().
+ */
+static inline bool skb_vlan_inet_prepare(struct sk_buff *skb)
+{
+       int nhlen = 0, maclen = ETH_HLEN;
+       __be16 type = skb->protocol;
+
+       /* Essentially this is skb_protocol(skb, true)
+        * And we get MAC len.
+        */
+       if (eth_type_vlan(type))
+               type = __vlan_get_protocol(skb, type, &maclen);
+
+       switch (type) {
+#if IS_ENABLED(CONFIG_IPV6)
+       case htons(ETH_P_IPV6):
+               nhlen = sizeof(struct ipv6hdr);
+               break;
+#endif
+       case htons(ETH_P_IP):
+               nhlen = sizeof(struct iphdr);
+               break;
+       }
+       /* For ETH_P_IPV6/ETH_P_IP we make sure to pull
+        * a base network header in skb->head.
+        */
+       if (!pskb_may_pull(skb, maclen + nhlen))
+               return false;
+
+       skb_set_network_header(skb, maclen);
+       return true;
+}
+
 static inline int ip_encap_hlen(struct ip_tunnel_encap *e)
 {
        const struct ip_tunnel_encap_ops *ops;
index a763dd327c6ea95d6b94fda1ea2efd8f1784335f..9abb7ee40d72fc2e7d2ef0ec86ef18df939ddd9c 100644 (file)
@@ -336,7 +336,7 @@ int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
 int nf_flow_table_offload_init(void);
 void nf_flow_table_offload_exit(void);
 
-static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
+static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
 {
        __be16 proto;
 
@@ -352,6 +352,16 @@ static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
        return 0;
 }
 
+static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
+{
+       if (!pskb_may_pull(skb, PPPOE_SES_HLEN))
+               return false;
+
+       *inner_proto = __nf_flow_pppoe_proto(skb);
+
+       return true;
+}
+
 #define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
 #define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
 #define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count)      \
index e27c28b612e464ca41c9f07e213d48bf84f11bf6..3f1ed467f951f6342d9ee8da6b576cf8c787af2d 100644 (file)
@@ -307,9 +307,23 @@ static inline void *nft_elem_priv_cast(const struct nft_elem_priv *priv)
        return (void *)priv;
 }
 
+
+/**
+ * enum nft_iter_type - nftables set iterator type
+ *
+ * @NFT_ITER_READ: read-only iteration over set elements
+ * @NFT_ITER_UPDATE: iteration under mutex to update set element state
+ */
+enum nft_iter_type {
+       NFT_ITER_UNSPEC,
+       NFT_ITER_READ,
+       NFT_ITER_UPDATE,
+};
+
 struct nft_set;
 struct nft_set_iter {
        u8              genmask;
+       enum nft_iter_type type:8;
        unsigned int    count;
        unsigned int    skip;
        int             err;
index cefe0c4bdae34c91868c22731a3b666f8e16e996..41ca14e81d55f926dc4002e820d7e027f4729021 100644 (file)
@@ -117,6 +117,7 @@ struct Qdisc {
        struct qdisc_skb_head   q;
        struct gnet_stats_basic_sync bstats;
        struct gnet_stats_queue qstats;
+       int                     owner;
        unsigned long           state;
        unsigned long           state2; /* must be written under qdisc spinlock */
        struct Qdisc            *next_sched;
index ba2d96a1bc2f94703945c5f79294a66af1fe8fd4..f50fcafc69de20b8b20a53a45f29b23f4259a65e 100644 (file)
@@ -609,7 +609,7 @@ TRACE_EVENT(rpcgss_context,
                __field(unsigned int, timeout)
                __field(u32, window_size)
                __field(int, len)
-               __string(acceptor, data)
+               __string_len(acceptor, data, len)
        ),
 
        TP_fast_assign(
@@ -618,7 +618,7 @@ TRACE_EVENT(rpcgss_context,
                __entry->timeout = timeout;
                __entry->window_size = window_size;
                __entry->len = len;
-               strncpy(__get_str(acceptor), data, len);
+               __assign_str(acceptor, data);
        ),
 
        TP_printk("win_size=%u expiry=%lu now=%lu timeout=%u acceptor=%.*s",
index bea6973906134656d84299958258205932c23e04..b95dd84eef2db2311f985921064e65d96e3e2f4c 100644 (file)
 /* Get the config size */
 #define VHOST_VDPA_GET_CONFIG_SIZE     _IOR(VHOST_VIRTIO, 0x79, __u32)
 
-/* Get the count of all virtqueues */
-#define VHOST_VDPA_GET_VQS_COUNT       _IOR(VHOST_VIRTIO, 0x80, __u32)
-
-/* Get the number of virtqueue groups. */
-#define VHOST_VDPA_GET_GROUP_NUM       _IOR(VHOST_VIRTIO, 0x81, __u32)
-
 /* Get the number of address spaces. */
 #define VHOST_VDPA_GET_AS_NUM          _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
 
 #define VHOST_VDPA_GET_VRING_DESC_GROUP        _IOWR(VHOST_VIRTIO, 0x7F,       \
                                              struct vhost_vring_state)
 
+
+/* Get the count of all virtqueues */
+#define VHOST_VDPA_GET_VQS_COUNT       _IOR(VHOST_VIRTIO, 0x80, __u32)
+
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM       _IOR(VHOST_VIRTIO, 0x81, __u32)
+
 /* Get the queue size of a specific virtqueue.
  * userspace set the vring index in vhost_vring_state.index
  * kernel set the queue size in vhost_vring_state.num
  */
-#define VHOST_VDPA_GET_VRING_SIZE      _IOWR(VHOST_VIRTIO, 0x80,       \
+#define VHOST_VDPA_GET_VRING_SIZE      _IOWR(VHOST_VIRTIO, 0x82,       \
                                              struct vhost_vring_state)
 #endif
index 4521c2b66b98db3c3affc55c7aeb4a69b8eec0a7..c170a2b8d2cf21f06d1c5af8bf57edecb94aaa95 100644 (file)
@@ -2602,19 +2602,6 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        if (__io_cqring_events_user(ctx) >= min_events)
                return 0;
 
-       if (sig) {
-#ifdef CONFIG_COMPAT
-               if (in_compat_syscall())
-                       ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
-                                                     sigsz);
-               else
-#endif
-                       ret = set_user_sigmask(sig, sigsz);
-
-               if (ret)
-                       return ret;
-       }
-
        init_waitqueue_func_entry(&iowq.wq, io_wake_function);
        iowq.wq.private = current;
        INIT_LIST_HEAD(&iowq.wq.entry);
@@ -2633,6 +2620,19 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
                io_napi_adjust_timeout(ctx, &iowq, &ts);
        }
 
+       if (sig) {
+#ifdef CONFIG_COMPAT
+               if (in_compat_syscall())
+                       ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig,
+                                                     sigsz);
+               else
+#endif
+                       ret = set_user_sigmask(sig, sigsz);
+
+               if (ret)
+                       return ret;
+       }
+
        io_napi_busy_loop(ctx, &iowq);
 
        trace_io_uring_cqring_wait(ctx, min_events);
index 1e7665ff6ef70264b26206f99c34aa5516190129..4afb475d41974b95a86a22bd84771d8c29781c08 100644 (file)
@@ -1276,6 +1276,7 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
 
        if (req_has_async_data(req)) {
                kmsg = req->async_data;
+               kmsg->msg.msg_control_user = sr->msg_control;
        } else {
                ret = io_sendmsg_copy_hdr(req, &iomsg);
                if (ret)
index 8f6affd051f77564f96ca4682a58d0c131f62c56..07ad53b7f11952080e890ed91f99f3e762bf984d 100644 (file)
@@ -3207,7 +3207,8 @@ enum cpu_mitigations {
 };
 
 static enum cpu_mitigations cpu_mitigations __ro_after_init =
-       CPU_MITIGATIONS_AUTO;
+       IS_ENABLED(CONFIG_SPECULATION_MITIGATIONS) ? CPU_MITIGATIONS_AUTO :
+                                                    CPU_MITIGATIONS_OFF;
 
 static int __init mitigations_parse_cmdline(char *arg)
 {
index 86fe172b5958232ee29d481bf2f9fe60a51c5881..a5e0dfc44d24e22641e72bb0362511a33b23a1fd 100644 (file)
  * @alloc_size:        Size of the allocated buffer.
  * @list:      The free list describing the number of free entries available
  *             from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ *             allocated non-padding slot.
  */
 struct io_tlb_slot {
        phys_addr_t orig_addr;
        size_t alloc_size;
-       unsigned int list;
+       unsigned short list;
+       unsigned short pad_slots;
 };
 
 static bool swiotlb_force_bounce;
@@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
                                         mem->nslabs - i);
                mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                mem->slots[i].alloc_size = 0;
+               mem->slots[i].pad_slots = 0;
        }
 
        memset(vaddr, 0, bytes);
@@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev)
 #endif
 }
 
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev:         Owning device.
+ * @align_mask:  Allocation alignment mask.
+ * @addr:        DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
  */
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static unsigned int swiotlb_align_offset(struct device *dev,
+                                        unsigned int align_mask, u64 addr)
 {
-       return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+       return addr & dma_get_min_align_mask(dev) &
+               (align_mask | (IO_TLB_SIZE - 1));
 }
 
 /*
@@ -841,27 +863,23 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
        size_t alloc_size = mem->slots[index].alloc_size;
        unsigned long pfn = PFN_DOWN(orig_addr);
        unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
-       unsigned int tlb_offset, orig_addr_offset;
+       int tlb_offset;
 
        if (orig_addr == INVALID_PHYS_ADDR)
                return;
 
-       tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
-       orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
-       if (tlb_offset < orig_addr_offset) {
-               dev_WARN_ONCE(dev, 1,
-                       "Access before mapping start detected. orig offset %u, requested offset %u.\n",
-                       orig_addr_offset, tlb_offset);
-               return;
-       }
-
-       tlb_offset -= orig_addr_offset;
-       if (tlb_offset > alloc_size) {
-               dev_WARN_ONCE(dev, 1,
-                       "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
-                       alloc_size, size, tlb_offset);
-               return;
-       }
+       /*
+        * It's valid for tlb_offset to be negative. This can happen when the
+        * "offset" returned by swiotlb_align_offset() is non-zero, and the
+        * tlb_addr is pointing within the first "offset" bytes of the second
+        * or subsequent slots of the allocated swiotlb area. While it's not
+        * valid for tlb_addr to be pointing within the first "offset" bytes
+        * of the first slot, there's no way to check for such an error since
+        * this function can't distinguish the first slot from the second and
+        * subsequent slots.
+        */
+       tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+                    swiotlb_align_offset(dev, 0, orig_addr);
 
        orig_addr += tlb_offset;
        alloc_size -= tlb_offset;
@@ -1005,7 +1023,7 @@ static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool
        unsigned long max_slots = get_max_slots(boundary_mask);
        unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
        unsigned int nslots = nr_slots(alloc_size), stride;
-       unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+       unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
        unsigned int index, slots_checked, count = 0, i;
        unsigned long flags;
        unsigned int slot_base;
@@ -1328,11 +1346,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
                unsigned long attrs)
 {
        struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
-       unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+       unsigned int offset;
        struct io_tlb_pool *pool;
        unsigned int i;
        int index;
        phys_addr_t tlb_addr;
+       unsigned short pad_slots;
 
        if (!mem || !mem->nslabs) {
                dev_warn_ratelimited(dev,
@@ -1349,6 +1368,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
                return (phys_addr_t)DMA_MAPPING_ERROR;
        }
 
+       offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
        index = swiotlb_find_slots(dev, orig_addr,
                                   alloc_size + offset, alloc_align_mask, &pool);
        if (index == -1) {
@@ -1364,6 +1384,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
         * This is needed when we sync the memory.  Then we sync the buffer if
         * needed.
         */
+       pad_slots = offset >> IO_TLB_SHIFT;
+       offset &= (IO_TLB_SIZE - 1);
+       index += pad_slots;
+       pool->slots[index].pad_slots = pad_slots;
        for (i = 0; i < nr_slots(alloc_size + offset); i++)
                pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
        tlb_addr = slot_addr(pool->start, index) + offset;
@@ -1384,13 +1408,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
 {
        struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
        unsigned long flags;
-       unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
-       int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
-       int nslots = nr_slots(mem->slots[index].alloc_size + offset);
-       int aindex = index / mem->area_nslabs;
-       struct io_tlb_area *area = &mem->areas[aindex];
+       unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+       int index, nslots, aindex;
+       struct io_tlb_area *area;
        int count, i;
 
+       index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+       index -= mem->slots[index].pad_slots;
+       nslots = nr_slots(mem->slots[index].alloc_size + offset);
+       aindex = index / mem->area_nslabs;
+       area = &mem->areas[aindex];
+
        /*
         * Return the buffer to the free list by setting the corresponding
         * entries to indicate the number of contiguous entries available.
@@ -1413,6 +1441,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
                mem->slots[i].list = ++count;
                mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                mem->slots[i].alloc_size = 0;
+               mem->slots[i].pad_slots = 0;
        }
 
        /*
@@ -1647,9 +1676,6 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
 static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
                                         const char *dirname)
 {
-       atomic_long_set(&mem->total_used, 0);
-       atomic_long_set(&mem->used_hiwater, 0);
-
        mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
        if (!mem->nslabs)
                return;
@@ -1660,7 +1686,6 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
        debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
                        &fops_io_tlb_hiwater);
 #ifdef CONFIG_SWIOTLB_DYNAMIC
-       atomic_long_set(&mem->transient_nslabs, 0);
        debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
                            mem, &fops_io_tlb_transient_used);
 #endif
index 39a5046c2f0bf49e1bcade15c4c3c5574742b09d..aebb3e6c96dc62e5818ce16e31779868ea669067 100644 (file)
@@ -714,6 +714,23 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                } else if (anon_vma_fork(tmp, mpnt))
                        goto fail_nomem_anon_vma_fork;
                vm_flags_clear(tmp, VM_LOCKED_MASK);
+               /*
+                * Copy/update hugetlb private vma information.
+                */
+               if (is_vm_hugetlb_page(tmp))
+                       hugetlb_dup_vma_private(tmp);
+
+               /*
+                * Link the vma into the MT. After using __mt_dup(), memory
+                * allocation is not necessary here, so it cannot fail.
+                */
+               vma_iter_bulk_store(&vmi, tmp);
+
+               mm->map_count++;
+
+               if (tmp->vm_ops && tmp->vm_ops->open)
+                       tmp->vm_ops->open(tmp);
+
                file = tmp->vm_file;
                if (file) {
                        struct address_space *mapping = file->f_mapping;
@@ -730,25 +747,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
                        i_mmap_unlock_write(mapping);
                }
 
-               /*
-                * Copy/update hugetlb private vma information.
-                */
-               if (is_vm_hugetlb_page(tmp))
-                       hugetlb_dup_vma_private(tmp);
-
-               /*
-                * Link the vma into the MT. After using __mt_dup(), memory
-                * allocation is not necessary here, so it cannot fail.
-                */
-               vma_iter_bulk_store(&vmi, tmp);
-
-               mm->map_count++;
                if (!(tmp->vm_flags & VM_WIPEONFORK))
                        retval = copy_page_range(tmp, mpnt);
 
-               if (tmp->vm_ops && tmp->vm_ops->open)
-                       tmp->vm_ops->open(tmp);
-
                if (retval) {
                        mpnt = vma_next(&vmi);
                        goto loop_out;
index 9d9095e817928658d2c6d54d5da6f4826ff7c6be..65adc815fc6e63027e1b7f0b23c597475a3fea1e 100644 (file)
@@ -1567,10 +1567,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
        jump_label_lock();
        preempt_disable();
 
-       /* Ensure it is not in reserved area nor out of text */
-       if (!(core_kernel_text((unsigned long) p->addr) ||
-           is_module_text_address((unsigned long) p->addr)) ||
-           in_gate_area_no_mm((unsigned long) p->addr) ||
+       /* Ensure the address is in a text area, and find a module if exists. */
+       *probed_mod = NULL;
+       if (!core_kernel_text((unsigned long) p->addr)) {
+               *probed_mod = __module_text_address((unsigned long) p->addr);
+               if (!(*probed_mod)) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+       }
+       /* Ensure it is not in reserved area. */
+       if (in_gate_area_no_mm((unsigned long) p->addr) ||
            within_kprobe_blacklist((unsigned long) p->addr) ||
            jump_label_text_reserved(p->addr, p->addr) ||
            static_call_text_reserved(p->addr, p->addr) ||
@@ -1580,8 +1587,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
                goto out;
        }
 
-       /* Check if 'p' is probing a module. */
-       *probed_mod = __module_text_address((unsigned long) p->addr);
+       /* Get module refcount and reject __init functions for loaded modules. */
        if (*probed_mod) {
                /*
                 * We must hold a refcount of the probed module while updating
index e3ae93bbcb9b50d487727bee16f63f67d66442ac..09f8397bae15fb9c895d060d7708c9bca5ef62f7 100644 (file)
@@ -106,6 +106,12 @@ static void s2idle_enter(void)
        swait_event_exclusive(s2idle_wait_head,
                    s2idle_state == S2IDLE_STATE_WAKE);
 
+       /*
+        * Kick all CPUs to ensure that they resume their timers and restore
+        * consistent system state.
+        */
+       wake_up_all_idle_cpus();
+
        cpus_read_unlock();
 
        raw_spin_lock_irq(&s2idle_lock);
index fb0fdec8719a13ed5fd5eb66d13027e184dce5de..d88b13076b7944e54fefb2802d914f8f7fe1abf5 100644 (file)
@@ -7,6 +7,7 @@
  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
  */
+#include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
@@ -84,7 +85,7 @@ int tick_is_oneshot_available(void)
  */
 static void tick_periodic(int cpu)
 {
-       if (tick_do_timer_cpu == cpu) {
+       if (READ_ONCE(tick_do_timer_cpu) == cpu) {
                raw_spin_lock(&jiffies_lock);
                write_seqcount_begin(&jiffies_seq);
 
@@ -215,8 +216,8 @@ static void tick_setup_device(struct tick_device *td,
                 * If no cpu took the do_timer update, assign it to
                 * this cpu:
                 */
-               if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
-                       tick_do_timer_cpu = cpu;
+               if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
+                       WRITE_ONCE(tick_do_timer_cpu, cpu);
                        tick_next_period = ktime_get();
 #ifdef CONFIG_NO_HZ_FULL
                        /*
@@ -232,7 +233,7 @@ static void tick_setup_device(struct tick_device *td,
                                                !tick_nohz_full_cpu(cpu)) {
                        tick_take_do_timer_from_boot();
                        tick_do_timer_boot_cpu = -1;
-                       WARN_ON(tick_do_timer_cpu != cpu);
+                       WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
 #endif
                }
 
@@ -406,10 +407,10 @@ void tick_assert_timekeeping_handover(void)
 int tick_cpu_dying(unsigned int dying_cpu)
 {
        /*
-        * If the current CPU is the timekeeper, it's the only one that
-        * can safely hand over its duty. Also all online CPUs are in
-        * stop machine, guaranteed not to be idle, therefore it's safe
-        * to pick any online successor.
+        * If the current CPU is the timekeeper, it's the only one that can
+        * safely hand over its duty. Also all online CPUs are in stop
+        * machine, guaranteed not to be idle, therefore there is no
+        * concurrency and it's safe to pick any online successor.
         */
        if (tick_do_timer_cpu == dying_cpu)
                tick_do_timer_cpu = cpumask_first(cpu_online_mask);
index 1331216a9cae749cce5e13b7ff4adcf9ba5fefaa..71a792cd893620eebe73eb1a0fc0c4ff5d454344 100644 (file)
@@ -8,6 +8,7 @@
  *
  *  Started by: Thomas Gleixner and Ingo Molnar
  */
+#include <linux/compiler.h>
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/hrtimer.h>
@@ -204,7 +205,7 @@ static inline void tick_sched_flag_clear(struct tick_sched *ts,
 
 static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
 {
-       int cpu = smp_processor_id();
+       int tick_cpu, cpu = smp_processor_id();
 
        /*
         * Check if the do_timer duty was dropped. We don't care about
@@ -216,16 +217,18 @@ static void tick_sched_do_timer(struct tick_sched *ts, ktime_t now)
         * If nohz_full is enabled, this should not happen because the
         * 'tick_do_timer_cpu' CPU never relinquishes.
         */
-       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) &&
-           unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+       tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
+       if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && unlikely(tick_cpu == TICK_DO_TIMER_NONE)) {
 #ifdef CONFIG_NO_HZ_FULL
                WARN_ON_ONCE(tick_nohz_full_running);
 #endif
-               tick_do_timer_cpu = cpu;
+               WRITE_ONCE(tick_do_timer_cpu, cpu);
+               tick_cpu = cpu;
        }
 
        /* Check if jiffies need an update */
-       if (tick_do_timer_cpu == cpu)
+       if (tick_cpu == cpu)
                tick_do_update_jiffies64(now);
 
        /*
@@ -610,7 +613,7 @@ bool tick_nohz_cpu_hotpluggable(unsigned int cpu)
         * timers, workqueues, timekeeping, ...) on behalf of full dynticks
         * CPUs. It must remain online when nohz full is enabled.
         */
-       if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
+       if (tick_nohz_full_running && READ_ONCE(tick_do_timer_cpu) == cpu)
                return false;
        return true;
 }
@@ -891,6 +894,7 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 {
        u64 basemono, next_tick, delta, expires;
        unsigned long basejiff;
+       int tick_cpu;
 
        basemono = get_jiffies_update(&basejiff);
        ts->last_jiffies = basejiff;
@@ -947,9 +951,9 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
         * Otherwise we can sleep as long as we want.
         */
        delta = timekeeping_max_deferment();
-       if (cpu != tick_do_timer_cpu &&
-           (tick_do_timer_cpu != TICK_DO_TIMER_NONE ||
-            !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
+       tick_cpu = READ_ONCE(tick_do_timer_cpu);
+       if (tick_cpu != cpu &&
+           (tick_cpu != TICK_DO_TIMER_NONE || !tick_sched_flag_test(ts, TS_FLAG_DO_TIMER_LAST)))
                delta = KTIME_MAX;
 
        /* Calculate the next expiry time */
@@ -970,6 +974,7 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
        unsigned long basejiff = ts->last_jiffies;
        u64 basemono = ts->timer_expires_base;
        bool timer_idle = tick_sched_flag_test(ts, TS_FLAG_STOPPED);
+       int tick_cpu;
        u64 expires;
 
        /* Make sure we won't be trying to stop it twice in a row. */
@@ -1007,10 +1012,11 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
         * do_timer() never gets invoked. Keep track of the fact that it
         * was the one which had the do_timer() duty last.
         */
-       if (cpu == tick_do_timer_cpu) {
-               tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+       tick_cpu = READ_ONCE(tick_do_timer_cpu);
+       if (tick_cpu == cpu) {
+               WRITE_ONCE(tick_do_timer_cpu, TICK_DO_TIMER_NONE);
                tick_sched_flag_set(ts, TS_FLAG_DO_TIMER_LAST);
-       } else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+       } else if (tick_cpu != TICK_DO_TIMER_NONE) {
                tick_sched_flag_clear(ts, TS_FLAG_DO_TIMER_LAST);
        }
 
@@ -1173,15 +1179,17 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
                return false;
 
        if (tick_nohz_full_enabled()) {
+               int tick_cpu = READ_ONCE(tick_do_timer_cpu);
+
                /*
                 * Keep the tick alive to guarantee timekeeping progression
                 * if there are full dynticks CPUs around
                 */
-               if (tick_do_timer_cpu == cpu)
+               if (tick_cpu == cpu)
                        return false;
 
                /* Should not happen for nohz-full */
-               if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
+               if (WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE))
                        return false;
        }
 
index 61c541c36596d9cdb532d876b56a273f44731928..47345bf1d4a9f7e850db213999c62ecb02f8fea0 100644 (file)
@@ -965,7 +965,7 @@ config FTRACE_RECORD_RECURSION
 
 config FTRACE_RECORD_RECURSION_SIZE
        int "Max number of recursed functions to record"
-       default 128
+       default 128
        depends on FTRACE_RECORD_RECURSION
        help
          This defines the limit of number of functions that can be
index 25476ead681b8411f41d713a77603cdf0653b4ad..6511dc3a00da841bc79554973636056b51c600ff 100644 (file)
@@ -1393,7 +1393,6 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
        old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
        old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
 
-       local_inc(&cpu_buffer->pages_touched);
        /*
         * Just make sure we have seen our old_write and synchronize
         * with any interrupts that come in.
@@ -1430,8 +1429,9 @@ static void rb_tail_page_update(struct ring_buffer_per_cpu *cpu_buffer,
                 */
                local_set(&next_page->page->commit, 0);
 
-               /* Again, either we update tail_page or an interrupt does */
-               (void)cmpxchg(&cpu_buffer->tail_page, tail_page, next_page);
+               /* Either we update tail_page or an interrupt does */
+               if (try_cmpxchg(&cpu_buffer->tail_page, &tail_page, next_page))
+                       local_inc(&cpu_buffer->pages_touched);
        }
 }
 
index 7c364b87352eed92e0f76137091882231f187028..52f75c36bbca4922bec786815bb70ff409f62a61 100644 (file)
@@ -1670,6 +1670,7 @@ static int trace_format_open(struct inode *inode, struct file *file)
        return 0;
 }
 
+#ifdef CONFIG_PERF_EVENTS
 static ssize_t
 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 {
@@ -1684,6 +1685,7 @@ event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 
        return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
 }
+#endif
 
 static ssize_t
 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
@@ -2152,10 +2154,12 @@ static const struct file_operations ftrace_event_format_fops = {
        .release = seq_release,
 };
 
+#ifdef CONFIG_PERF_EVENTS
 static const struct file_operations ftrace_event_id_fops = {
        .read = event_id_read,
        .llseek = default_llseek,
 };
+#endif
 
 static const struct file_operations ftrace_event_filter_fops = {
        .open = tracing_open_file_tr,
index bf70850035c76f468c7c0af023454bf5bc6716e3..404dba36bae380eeadfd881a8b807dc7eab0037f 100644 (file)
@@ -594,13 +594,15 @@ static void test_ip_fast_csum(struct kunit *test)
 
 static void test_csum_ipv6_magic(struct kunit *test)
 {
-#if defined(CONFIG_NET)
        const struct in6_addr *saddr;
        const struct in6_addr *daddr;
        unsigned int len;
        unsigned char proto;
        __wsum csum;
 
+       if (!IS_ENABLED(CONFIG_NET))
+               return;
+
        const int daddr_offset = sizeof(struct in6_addr);
        const int len_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr);
        const int proto_offset = sizeof(struct in6_addr) + sizeof(struct in6_addr) +
@@ -618,7 +620,6 @@ static void test_csum_ipv6_magic(struct kunit *test)
                CHECK_EQ(to_sum16(expected_csum_ipv6_magic[i]),
                         csum_ipv6_magic(saddr, daddr, len, proto, csum));
        }
-#endif /* !CONFIG_NET */
 }
 
 static struct kunit_case __refdata checksum_test_cases[] = {
index 276c12140ee26dac37137e400f4c303d34045bb1..c288df9372ede1cbda1371ae92e20a25ae9ebe91 100644 (file)
@@ -134,7 +134,7 @@ static const test_ubsan_fp test_ubsan_array[] = {
 };
 
 /* Excluded because they Oops the module. */
-static const test_ubsan_fp skip_ubsan_array[] = {
+static __used const test_ubsan_fp skip_ubsan_array[] = {
        test_ubsan_divrem_overflow,
 };
 
index af8edadc05d1b87200050bd34ff3d58ec52abd52..1611e73b1121b1b9031356ccac6b765c60327ba1 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1206,6 +1206,22 @@ static long __get_user_pages(struct mm_struct *mm,
 
                /* first iteration or cross vma bound */
                if (!vma || start >= vma->vm_end) {
+                       /*
+                        * MADV_POPULATE_(READ|WRITE) wants to handle VMA
+                        * lookups+error reporting differently.
+                        */
+                       if (gup_flags & FOLL_MADV_POPULATE) {
+                               vma = vma_lookup(mm, start);
+                               if (!vma) {
+                                       ret = -ENOMEM;
+                                       goto out;
+                               }
+                               if (check_vma_flags(vma, gup_flags)) {
+                                       ret = -EINVAL;
+                                       goto out;
+                               }
+                               goto retry;
+                       }
                        vma = gup_vma_lookup(mm, start);
                        if (!vma && in_gate_area(mm, start)) {
                                ret = get_gate_page(mm, start & PAGE_MASK,
@@ -1685,35 +1701,35 @@ long populate_vma_page_range(struct vm_area_struct *vma,
 }
 
 /*
- * faultin_vma_page_range() - populate (prefault) page tables inside the
- *                           given VMA range readable/writable
+ * faultin_page_range() - populate (prefault) page tables inside the
+ *                       given range readable/writable
  *
  * This takes care of mlocking the pages, too, if VM_LOCKED is set.
  *
- * @vma: target vma
+ * @mm: the mm to populate page tables in
  * @start: start address
  * @end: end address
  * @write: whether to prefault readable or writable
  * @locked: whether the mmap_lock is still held
  *
- * Returns either number of processed pages in the vma, or a negative error
- * code on error (see __get_user_pages()).
+ * Returns either number of processed pages in the MM, or a negative error
+ * code on error (see __get_user_pages()). Note that this function reports
+ * errors related to VMAs, such as incompatible mappings, as expected by
+ * MADV_POPULATE_(READ|WRITE).
  *
- * vma->vm_mm->mmap_lock must be held. The range must be page-aligned and
- * covered by the VMA. If it's released, *@locked will be set to 0.
+ * The range must be page-aligned.
+ *
+ * mm->mmap_lock must be held. If it's released, *@locked will be set to 0.
  */
-long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
-                           unsigned long end, bool write, int *locked)
+long faultin_page_range(struct mm_struct *mm, unsigned long start,
+                       unsigned long end, bool write, int *locked)
 {
-       struct mm_struct *mm = vma->vm_mm;
        unsigned long nr_pages = (end - start) / PAGE_SIZE;
        int gup_flags;
        long ret;
 
        VM_BUG_ON(!PAGE_ALIGNED(start));
        VM_BUG_ON(!PAGE_ALIGNED(end));
-       VM_BUG_ON_VMA(start < vma->vm_start, vma);
-       VM_BUG_ON_VMA(end > vma->vm_end, vma);
        mmap_assert_locked(mm);
 
        /*
@@ -1725,19 +1741,13 @@ long faultin_vma_page_range(struct vm_area_struct *vma, unsigned long start,
         *                a poisoned page.
         * !FOLL_FORCE: Require proper access permissions.
         */
-       gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE;
+       gup_flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_UNLOCKABLE |
+                   FOLL_MADV_POPULATE;
        if (write)
                gup_flags |= FOLL_WRITE;
 
-       /*
-        * We want to report -EINVAL instead of -EFAULT for any permission
-        * problems or incompatible mappings.
-        */
-       if (check_vma_flags(vma, gup_flags))
-               return -EINVAL;
-
-       ret = __get_user_pages(mm, start, nr_pages, gup_flags,
-                              NULL, locked);
+       ret = __get_user_pages_locked(mm, start, nr_pages, NULL, locked,
+                                     gup_flags);
        lru_add_drain();
        return ret;
 }
index 9859aa4f755380a88013c70791e6e6df90ce861f..89f58c7603b255feb3dceaccfee603a503c40e49 100644 (file)
@@ -2259,9 +2259,6 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                        goto unlock_ptls;
                }
 
-               folio_move_anon_rmap(src_folio, dst_vma);
-               WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
                src_pmdval = pmdp_huge_clear_flush(src_vma, src_addr, src_pmd);
                /* Folio got pinned from under us. Put it back and fail the move. */
                if (folio_maybe_dma_pinned(src_folio)) {
@@ -2270,6 +2267,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm
                        goto unlock_ptls;
                }
 
+               folio_move_anon_rmap(src_folio, dst_vma);
+               WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
                _dst_pmd = mk_huge_pmd(&src_folio->page, dst_vma->vm_page_prot);
                /* Follow mremap() behavior and treat the entry dirty after the move */
                _dst_pmd = pmd_mkwrite(pmd_mkdirty(_dst_pmd), dst_vma);
index 23ef240ba48a60a77102f7bf1beb2e76a987486d..31d00eee028f1179b99405f0fdd3461b16e17d2e 100644 (file)
@@ -7044,9 +7044,13 @@ long hugetlb_change_protection(struct vm_area_struct *vma,
                        if (!pte_same(pte, newpte))
                                set_huge_pte_at(mm, address, ptep, newpte, psize);
                } else if (unlikely(is_pte_marker(pte))) {
-                       /* No other markers apply for now. */
-                       WARN_ON_ONCE(!pte_marker_uffd_wp(pte));
-                       if (uffd_wp_resolve)
+                       /*
+                        * Do nothing on a poison marker; page is
+                        * corrupted, permissons do not apply.  Here
+                        * pte_marker_uffd_wp()==true implies !poison
+                        * because they're mutual exclusive.
+                        */
+                       if (pte_marker_uffd_wp(pte) && uffd_wp_resolve)
                                /* Safe to modify directly (non-present->none). */
                                huge_pte_clear(mm, address, ptep, psize);
                } else if (!huge_pte_none(pte)) {
index 7e486f2c502cee245991e2468a0655228a81aef5..07ad2675a88b4798b140d3af3a303a20519cddc3 100644 (file)
@@ -686,9 +686,8 @@ struct anon_vma *folio_anon_vma(struct folio *folio);
 void unmap_mapping_folio(struct folio *folio);
 extern long populate_vma_page_range(struct vm_area_struct *vma,
                unsigned long start, unsigned long end, int *locked);
-extern long faultin_vma_page_range(struct vm_area_struct *vma,
-                                  unsigned long start, unsigned long end,
-                                  bool write, int *locked);
+extern long faultin_page_range(struct mm_struct *mm, unsigned long start,
+               unsigned long end, bool write, int *locked);
 extern bool mlock_future_ok(struct mm_struct *mm, unsigned long flags,
                               unsigned long bytes);
 
@@ -1127,10 +1126,13 @@ enum {
        FOLL_FAST_ONLY = 1 << 20,
        /* allow unlocking the mmap lock */
        FOLL_UNLOCKABLE = 1 << 21,
+       /* VMA lookup+checks compatible with MADV_POPULATE_(READ|WRITE) */
+       FOLL_MADV_POPULATE = 1 << 22,
 };
 
 #define INTERNAL_GUP_FLAGS (FOLL_TOUCH | FOLL_TRIED | FOLL_REMOTE | FOLL_PIN | \
-                           FOLL_FAST_ONLY | FOLL_UNLOCKABLE)
+                           FOLL_FAST_ONLY | FOLL_UNLOCKABLE | \
+                           FOLL_MADV_POPULATE)
 
 /*
  * Indicates for which pages that are write-protected in the page table,
index 44a498c94158c882c624eac2e29a5f07d854e322..1a073fcc4c0c021496667619a20f6ee1afaef7c0 100644 (file)
@@ -908,27 +908,14 @@ static long madvise_populate(struct vm_area_struct *vma,
 {
        const bool write = behavior == MADV_POPULATE_WRITE;
        struct mm_struct *mm = vma->vm_mm;
-       unsigned long tmp_end;
        int locked = 1;
        long pages;
 
        *prev = vma;
 
        while (start < end) {
-               /*
-                * We might have temporarily dropped the lock. For example,
-                * our VMA might have been split.
-                */
-               if (!vma || start >= vma->vm_end) {
-                       vma = vma_lookup(mm, start);
-                       if (!vma)
-                               return -ENOMEM;
-               }
-
-               tmp_end = min_t(unsigned long, end, vma->vm_end);
                /* Populate (prefault) page tables readable/writable. */
-               pages = faultin_vma_page_range(vma, start, tmp_end, write,
-                                              &locked);
+               pages = faultin_page_range(mm, start, end, write, &locked);
                if (!locked) {
                        mmap_read_lock(mm);
                        locked = 1;
@@ -949,7 +936,7 @@ static long madvise_populate(struct vm_area_struct *vma,
                                pr_warn_once("%s: unhandled return value: %ld\n",
                                             __func__, pages);
                                fallthrough;
-                       case -ENOMEM:
+                       case -ENOMEM: /* No VMA or out of memory. */
                                return -ENOMEM;
                        }
                }
index 9349948f1abfd120977706bbda23456999f057bc..9e62a00b46ddee5899f85cfc252dabd7c0d04121 100644 (file)
@@ -154,11 +154,23 @@ static int __page_handle_poison(struct page *page)
 {
        int ret;
 
-       zone_pcp_disable(page_zone(page));
+       /*
+        * zone_pcp_disable() can't be used here. It will
+        * hold pcp_batch_high_lock and dissolve_free_huge_page() might hold
+        * cpu_hotplug_lock via static_key_slow_dec() when hugetlb vmemmap
+        * optimization is enabled. This will break current lock dependency
+        * chain and leads to deadlock.
+        * Disabling pcp before dissolving the page was a deterministic
+        * approach because we made sure that those pages cannot end up in any
+        * PCP list. Draining PCP lists expels those pages to the buddy system,
+        * but nothing guarantees that those pages do not get back to a PCP
+        * queue if we need to refill those.
+        */
        ret = dissolve_free_huge_page(page);
-       if (!ret)
+       if (!ret) {
+               drain_all_pages(page_zone(page));
                ret = take_page_off_buddy(page);
-       zone_pcp_enable(page_zone(page));
+       }
 
        return ret;
 }
index d17d1351ec84af6ae4f49dc34d4c27c56bf77468..742f432e5bf06f560abdc675d658401f76df5237 100644 (file)
@@ -118,7 +118,6 @@ static __init void init_page_owner(void)
        register_dummy_stack();
        register_failure_stack();
        register_early_stack();
-       static_branch_enable(&page_owner_inited);
        init_early_allocated_pages();
        /* Initialize dummy and failure stacks and link them to stack_list */
        dummy_stack.stack_record = __stack_depot_get_stack_record(dummy_handle);
@@ -129,6 +128,7 @@ static __init void init_page_owner(void)
                refcount_set(&failure_stack.stack_record->count, 1);
        dummy_stack.next = &failure_stack;
        stack_list = &dummy_stack;
+       static_branch_enable(&page_owner_inited);
 }
 
 struct page_ext_operations page_owner_ops = {
@@ -196,7 +196,8 @@ static void add_stack_record_to_list(struct stack_record *stack_record,
        spin_unlock_irqrestore(&stack_list_lock, flags);
 }
 
-static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
+static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask,
+                                  int nr_base_pages)
 {
        struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
 
@@ -217,20 +218,74 @@ static void inc_stack_record_count(depot_stack_handle_t handle, gfp_t gfp_mask)
                        /* Add the new stack_record to our list */
                        add_stack_record_to_list(stack_record, gfp_mask);
        }
-       refcount_inc(&stack_record->count);
+       refcount_add(nr_base_pages, &stack_record->count);
 }
 
-static void dec_stack_record_count(depot_stack_handle_t handle)
+static void dec_stack_record_count(depot_stack_handle_t handle,
+                                  int nr_base_pages)
 {
        struct stack_record *stack_record = __stack_depot_get_stack_record(handle);
 
-       if (stack_record)
-               refcount_dec(&stack_record->count);
+       if (!stack_record)
+               return;
+
+       if (refcount_sub_and_test(nr_base_pages, &stack_record->count))
+               pr_warn("%s: refcount went to 0 for %u handle\n", __func__,
+                       handle);
 }
 
-void __reset_page_owner(struct page *page, unsigned short order)
+static inline void __update_page_owner_handle(struct page_ext *page_ext,
+                                             depot_stack_handle_t handle,
+                                             unsigned short order,
+                                             gfp_t gfp_mask,
+                                             short last_migrate_reason, u64 ts_nsec,
+                                             pid_t pid, pid_t tgid, char *comm)
 {
        int i;
+       struct page_owner *page_owner;
+
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               page_owner->handle = handle;
+               page_owner->order = order;
+               page_owner->gfp_mask = gfp_mask;
+               page_owner->last_migrate_reason = last_migrate_reason;
+               page_owner->pid = pid;
+               page_owner->tgid = tgid;
+               page_owner->ts_nsec = ts_nsec;
+               strscpy(page_owner->comm, comm,
+                       sizeof(page_owner->comm));
+               __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+               __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+               page_ext = page_ext_next(page_ext);
+       }
+}
+
+static inline void __update_page_owner_free_handle(struct page_ext *page_ext,
+                                                  depot_stack_handle_t handle,
+                                                  unsigned short order,
+                                                  pid_t pid, pid_t tgid,
+                                                  u64 free_ts_nsec)
+{
+       int i;
+       struct page_owner *page_owner;
+
+       for (i = 0; i < (1 << order); i++) {
+               page_owner = get_page_owner(page_ext);
+               /* Only __reset_page_owner() wants to clear the bit */
+               if (handle) {
+                       __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
+                       page_owner->free_handle = handle;
+               }
+               page_owner->free_ts_nsec = free_ts_nsec;
+               page_owner->free_pid = current->pid;
+               page_owner->free_tgid = current->tgid;
+               page_ext = page_ext_next(page_ext);
+       }
+}
+
+void __reset_page_owner(struct page *page, unsigned short order)
+{
        struct page_ext *page_ext;
        depot_stack_handle_t handle;
        depot_stack_handle_t alloc_handle;
@@ -245,16 +300,10 @@ void __reset_page_owner(struct page *page, unsigned short order)
        alloc_handle = page_owner->handle;
 
        handle = save_stack(GFP_NOWAIT | __GFP_NOWARN);
-       for (i = 0; i < (1 << order); i++) {
-               __clear_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-               page_owner->free_handle = handle;
-               page_owner->free_ts_nsec = free_ts_nsec;
-               page_owner->free_pid = current->pid;
-               page_owner->free_tgid = current->tgid;
-               page_ext = page_ext_next(page_ext);
-               page_owner = get_page_owner(page_ext);
-       }
+       __update_page_owner_free_handle(page_ext, handle, order, current->pid,
+                                       current->tgid, free_ts_nsec);
        page_ext_put(page_ext);
+
        if (alloc_handle != early_handle)
                /*
                 * early_handle is being set as a handle for all those
@@ -263,39 +312,14 @@ void __reset_page_owner(struct page *page, unsigned short order)
                 * the machinery is not ready yet, we cannot decrement
                 * their refcount either.
                 */
-               dec_stack_record_count(alloc_handle);
-}
-
-static inline void __set_page_owner_handle(struct page_ext *page_ext,
-                                       depot_stack_handle_t handle,
-                                       unsigned short order, gfp_t gfp_mask)
-{
-       struct page_owner *page_owner;
-       int i;
-       u64 ts_nsec = local_clock();
-
-       for (i = 0; i < (1 << order); i++) {
-               page_owner = get_page_owner(page_ext);
-               page_owner->handle = handle;
-               page_owner->order = order;
-               page_owner->gfp_mask = gfp_mask;
-               page_owner->last_migrate_reason = -1;
-               page_owner->pid = current->pid;
-               page_owner->tgid = current->tgid;
-               page_owner->ts_nsec = ts_nsec;
-               strscpy(page_owner->comm, current->comm,
-                       sizeof(page_owner->comm));
-               __set_bit(PAGE_EXT_OWNER, &page_ext->flags);
-               __set_bit(PAGE_EXT_OWNER_ALLOCATED, &page_ext->flags);
-
-               page_ext = page_ext_next(page_ext);
-       }
+               dec_stack_record_count(alloc_handle, 1 << order);
 }
 
 noinline void __set_page_owner(struct page *page, unsigned short order,
                                        gfp_t gfp_mask)
 {
        struct page_ext *page_ext;
+       u64 ts_nsec = local_clock();
        depot_stack_handle_t handle;
 
        handle = save_stack(gfp_mask);
@@ -303,9 +327,11 @@ noinline void __set_page_owner(struct page *page, unsigned short order,
        page_ext = page_ext_get(page);
        if (unlikely(!page_ext))
                return;
-       __set_page_owner_handle(page_ext, handle, order, gfp_mask);
+       __update_page_owner_handle(page_ext, handle, order, gfp_mask, -1,
+                                  current->pid, current->tgid, ts_nsec,
+                                  current->comm);
        page_ext_put(page_ext);
-       inc_stack_record_count(handle, gfp_mask);
+       inc_stack_record_count(handle, gfp_mask, 1 << order);
 }
 
 void __set_page_owner_migrate_reason(struct page *page, int reason)
@@ -340,9 +366,12 @@ void __split_page_owner(struct page *page, int old_order, int new_order)
 
 void __folio_copy_owner(struct folio *newfolio, struct folio *old)
 {
+       int i;
        struct page_ext *old_ext;
        struct page_ext *new_ext;
-       struct page_owner *old_page_owner, *new_page_owner;
+       struct page_owner *old_page_owner;
+       struct page_owner *new_page_owner;
+       depot_stack_handle_t migrate_handle;
 
        old_ext = page_ext_get(&old->page);
        if (unlikely(!old_ext))
@@ -356,30 +385,32 @@ void __folio_copy_owner(struct folio *newfolio, struct folio *old)
 
        old_page_owner = get_page_owner(old_ext);
        new_page_owner = get_page_owner(new_ext);
-       new_page_owner->order = old_page_owner->order;
-       new_page_owner->gfp_mask = old_page_owner->gfp_mask;
-       new_page_owner->last_migrate_reason =
-               old_page_owner->last_migrate_reason;
-       new_page_owner->handle = old_page_owner->handle;
-       new_page_owner->pid = old_page_owner->pid;
-       new_page_owner->tgid = old_page_owner->tgid;
-       new_page_owner->free_pid = old_page_owner->free_pid;
-       new_page_owner->free_tgid = old_page_owner->free_tgid;
-       new_page_owner->ts_nsec = old_page_owner->ts_nsec;
-       new_page_owner->free_ts_nsec = old_page_owner->ts_nsec;
-       strcpy(new_page_owner->comm, old_page_owner->comm);
-
+       migrate_handle = new_page_owner->handle;
+       __update_page_owner_handle(new_ext, old_page_owner->handle,
+                                  old_page_owner->order, old_page_owner->gfp_mask,
+                                  old_page_owner->last_migrate_reason,
+                                  old_page_owner->ts_nsec, old_page_owner->pid,
+                                  old_page_owner->tgid, old_page_owner->comm);
+       /*
+        * Do not proactively clear PAGE_EXT_OWNER{_ALLOCATED} bits as the folio
+        * will be freed after migration. Keep them until then as they may be
+        * useful.
+        */
+       __update_page_owner_free_handle(new_ext, 0, old_page_owner->order,
+                                       old_page_owner->free_pid,
+                                       old_page_owner->free_tgid,
+                                       old_page_owner->free_ts_nsec);
        /*
-        * We don't clear the bit on the old folio as it's going to be freed
-        * after migration. Until then, the info can be useful in case of
-        * a bug, and the overall stats will be off a bit only temporarily.
-        * Also, migrate_misplaced_transhuge_page() can still fail the
-        * migration and then we want the old folio to retain the info. But
-        * in that case we also don't need to explicitly clear the info from
-        * the new page, which will be freed.
+        * We linked the original stack to the new folio, we need to do the same
+        * for the new one and the old folio otherwise there will be an imbalance
+        * when subtracting those pages from the stack.
         */
-       __set_bit(PAGE_EXT_OWNER, &new_ext->flags);
-       __set_bit(PAGE_EXT_OWNER_ALLOCATED, &new_ext->flags);
+       for (i = 0; i < (1 << new_page_owner->order); i++) {
+               old_page_owner->handle = migrate_handle;
+               old_ext = page_ext_next(old_ext);
+               old_page_owner = get_page_owner(old_ext);
+       }
+
        page_ext_put(new_ext);
        page_ext_put(old_ext);
 }
@@ -787,8 +818,9 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
                                goto ext_put_continue;
 
                        /* Found early allocated page */
-                       __set_page_owner_handle(page_ext, early_handle,
-                                               0, 0);
+                       __update_page_owner_handle(page_ext, early_handle, 0, 0,
+                                                  -1, local_clock(), current->pid,
+                                                  current->tgid, current->comm);
                        count++;
 ext_put_continue:
                        page_ext_put(page_ext);
@@ -840,13 +872,11 @@ static void *stack_start(struct seq_file *m, loff_t *ppos)
                 * value of stack_list.
                 */
                stack = smp_load_acquire(&stack_list);
+               m->private = stack;
        } else {
                stack = m->private;
-               stack = stack->next;
        }
 
-       m->private = stack;
-
        return stack;
 }
 
@@ -861,11 +891,11 @@ static void *stack_next(struct seq_file *m, void *v, loff_t *ppos)
        return stack;
 }
 
-static unsigned long page_owner_stack_threshold;
+static unsigned long page_owner_pages_threshold;
 
 static int stack_print(struct seq_file *m, void *v)
 {
-       int i, stack_count;
+       int i, nr_base_pages;
        struct stack *stack = v;
        unsigned long *entries;
        unsigned long nr_entries;
@@ -876,14 +906,14 @@ static int stack_print(struct seq_file *m, void *v)
 
        nr_entries = stack_record->size;
        entries = stack_record->entries;
-       stack_count = refcount_read(&stack_record->count) - 1;
+       nr_base_pages = refcount_read(&stack_record->count) - 1;
 
-       if (stack_count < 1 || stack_count < page_owner_stack_threshold)
+       if (nr_base_pages < 1 || nr_base_pages < page_owner_pages_threshold)
                return 0;
 
        for (i = 0; i < nr_entries; i++)
                seq_printf(m, " %pS\n", (void *)entries[i]);
-       seq_printf(m, "stack_count: %d\n\n", stack_count);
+       seq_printf(m, "nr_base_pages: %d\n\n", nr_base_pages);
 
        return 0;
 }
@@ -913,13 +943,13 @@ static const struct file_operations page_owner_stack_operations = {
 
 static int page_owner_threshold_get(void *data, u64 *val)
 {
-       *val = READ_ONCE(page_owner_stack_threshold);
+       *val = READ_ONCE(page_owner_pages_threshold);
        return 0;
 }
 
 static int page_owner_threshold_set(void *data, u64 val)
 {
-       WRITE_ONCE(page_owner_stack_threshold, val);
+       WRITE_ONCE(page_owner_pages_threshold, val);
        return 0;
 }
 
index 0aad0d9a621b80e7a3f758125806bfb64e984c12..94ab99b6b574a461e34bb875fdec497ad24728ce 100644 (file)
@@ -748,12 +748,6 @@ static long shmem_unused_huge_count(struct super_block *sb,
 
 #define shmem_huge SHMEM_HUGE_DENY
 
-bool shmem_is_huge(struct inode *inode, pgoff_t index, bool shmem_huge_force,
-                  struct mm_struct *mm, unsigned long vm_flags)
-{
-       return false;
-}
-
 static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                struct shrink_control *sc, unsigned long nr_to_split)
 {
index b95c36765d045c0486068362fbd949ab2b1866e8..2243cec18ecc866eb7877ae933828f1eeadc980a 100644 (file)
@@ -3948,7 +3948,7 @@ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface)
 
        spin_lock_bh(&bat_priv->tt.commit_lock);
 
-       while (true) {
+       while (timeout) {
                table_size = batadv_tt_local_table_transmit_size(bat_priv);
                if (packet_size_max >= table_size)
                        break;
index 00e02138003ecefef75714c950056ced5ccd5fda..efea25eb56ce036364c7325916326b687180bbcf 100644 (file)
@@ -105,8 +105,10 @@ void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
        if (hdev->req_status == HCI_REQ_PEND) {
                hdev->req_result = result;
                hdev->req_status = HCI_REQ_DONE;
-               if (skb)
+               if (skb) {
+                       kfree_skb(hdev->req_skb);
                        hdev->req_skb = skb_get(skb);
+               }
                wake_up_interruptible(&hdev->req_wait_q);
        }
 }
index 4ee1b976678b2525ff135fb947221b93923f2aee..703b84bd48d5befc51d787bcd6c04dcbcff61675 100644 (file)
@@ -1946,10 +1946,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
 
        switch (optname) {
        case HCI_DATA_DIR:
-               if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+               if (err)
                        break;
-               }
 
                if (opt)
                        hci_pi(sk)->cmsg_mask |= HCI_CMSG_DIR;
@@ -1958,10 +1957,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
                break;
 
        case HCI_TIME_STAMP:
-               if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+               if (err)
                        break;
-               }
 
                if (opt)
                        hci_pi(sk)->cmsg_mask |= HCI_CMSG_TSTAMP;
@@ -1979,11 +1977,9 @@ static int hci_sock_setsockopt_old(struct socket *sock, int level, int optname,
                        uf.event_mask[1] = *((u32 *) f->event_mask + 1);
                }
 
-               len = min_t(unsigned int, len, sizeof(uf));
-               if (copy_from_sockptr(&uf, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&uf, sizeof(uf), optval, len);
+               if (err)
                        break;
-               }
 
                if (!capable(CAP_NET_RAW)) {
                        uf.type_mask &= hci_sec_filter.type_mask;
@@ -2042,10 +2038,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname,
                        goto done;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(opt))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, len);
+               if (err)
                        break;
-               }
 
                hci_pi(sk)->mtu = opt;
                break;
index 8fe02921adf15d4b968be310415bf9383ae3d63d..c5d8799046ccffbf798e6f47ffaef3dddcb364ca 100644 (file)
@@ -2814,8 +2814,8 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
                                if (qos->bcast.in.phy & BT_ISO_PHY_CODED) {
                                        cp->scanning_phys |= LE_SCAN_PHY_CODED;
                                        hci_le_scan_phy_params(phy, type,
-                                                              interval,
-                                                              window);
+                                                              interval * 3,
+                                                              window * 3);
                                        num_phy++;
                                        phy++;
                                }
@@ -2835,7 +2835,7 @@ static int hci_le_set_ext_scan_param_sync(struct hci_dev *hdev, u8 type,
 
        if (scan_coded(hdev)) {
                cp->scanning_phys |= LE_SCAN_PHY_CODED;
-               hci_le_scan_phy_params(phy, type, interval, window);
+               hci_le_scan_phy_params(phy, type, interval * 3, window * 3);
                num_phy++;
                phy++;
        }
index c8793e57f4b547d5bd465b80575143083b867624..ef0cc80b4c0cc1ff4043d05c05fc0c429a64a6c2 100644 (file)
@@ -1451,8 +1451,8 @@ static bool check_ucast_qos(struct bt_iso_qos *qos)
 
 static bool check_bcast_qos(struct bt_iso_qos *qos)
 {
-       if (qos->bcast.sync_factor == 0x00)
-               return false;
+       if (!qos->bcast.sync_factor)
+               qos->bcast.sync_factor = 0x01;
 
        if (qos->bcast.packing > 0x01)
                return false;
@@ -1475,6 +1475,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
        if (qos->bcast.skip > 0x01f3)
                return false;
 
+       if (!qos->bcast.sync_timeout)
+               qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
        if (qos->bcast.sync_timeout < 0x000a || qos->bcast.sync_timeout > 0x4000)
                return false;
 
@@ -1484,6 +1487,9 @@ static bool check_bcast_qos(struct bt_iso_qos *qos)
        if (qos->bcast.mse > 0x1f)
                return false;
 
+       if (!qos->bcast.timeout)
+               qos->bcast.sync_timeout = BT_ISO_SYNC_TIMEOUT;
+
        if (qos->bcast.timeout < 0x000a || qos->bcast.timeout > 0x4000)
                return false;
 
@@ -1494,7 +1500,7 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                               sockptr_t optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
-       int len, err = 0;
+       int err = 0;
        struct bt_iso_qos qos = default_qos;
        u32 opt;
 
@@ -1509,10 +1515,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -1521,10 +1526,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_PKT_STATUS:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -1539,17 +1543,9 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               len = min_t(unsigned int, sizeof(qos), optlen);
-
-               if (copy_from_sockptr(&qos, optval, len)) {
-                       err = -EFAULT;
-                       break;
-               }
-
-               if (len == sizeof(qos.ucast) && !check_ucast_qos(&qos)) {
-                       err = -EINVAL;
+               err = bt_copy_from_sockptr(&qos, sizeof(qos), optval, optlen);
+               if (err)
                        break;
-               }
 
                iso_pi(sk)->qos = qos;
                iso_pi(sk)->qos_user_set = true;
@@ -1564,18 +1560,16 @@ static int iso_sock_setsockopt(struct socket *sock, int level, int optname,
                }
 
                if (optlen > sizeof(iso_pi(sk)->base)) {
-                       err = -EOVERFLOW;
+                       err = -EINVAL;
                        break;
                }
 
-               len = min_t(unsigned int, sizeof(iso_pi(sk)->base), optlen);
-
-               if (copy_from_sockptr(iso_pi(sk)->base, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(iso_pi(sk)->base, optlen, optval,
+                                          optlen);
+               if (err)
                        break;
-               }
 
-               iso_pi(sk)->base_len = len;
+               iso_pi(sk)->base_len = optlen;
 
                break;
 
index 467b242d8be071da16bd48d04e1520ce1e1aa8a6..dc089740879363dd0d6d973dcdb4fc05cfc7070a 100644 (file)
@@ -4054,8 +4054,7 @@ static int l2cap_connect_req(struct l2cap_conn *conn,
                return -EPROTO;
 
        hci_dev_lock(hdev);
-       if (hci_dev_test_flag(hdev, HCI_MGMT) &&
-           !test_and_set_bit(HCI_CONN_MGMT_CONNECTED, &hcon->flags))
+       if (hci_dev_test_flag(hdev, HCI_MGMT))
                mgmt_device_connected(hdev, hcon, NULL, 0);
        hci_dev_unlock(hdev);
 
index 4287aa6cc988e3ce34849d1f317be8fd8645832c..e7d810b23082f5ffd8ea4b506366b2684f2e1ece 100644 (file)
@@ -727,7 +727,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
        struct sock *sk = sock->sk;
        struct l2cap_chan *chan = l2cap_pi(sk)->chan;
        struct l2cap_options opts;
-       int len, err = 0;
+       int err = 0;
        u32 opt;
 
        BT_DBG("sk %p", sk);
@@ -754,11 +754,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
                opts.max_tx   = chan->max_tx;
                opts.txwin_size = chan->tx_win;
 
-               len = min_t(unsigned int, sizeof(opts), optlen);
-               if (copy_from_sockptr(&opts, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opts, sizeof(opts), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opts.txwin_size > L2CAP_DEFAULT_EXT_WINDOW) {
                        err = -EINVAL;
@@ -801,10 +799,9 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
                break;
 
        case L2CAP_LM:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt & L2CAP_LM_FIPS) {
                        err = -EINVAL;
@@ -885,7 +882,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
        struct bt_security sec;
        struct bt_power pwr;
        struct l2cap_conn *conn;
-       int len, err = 0;
+       int err = 0;
        u32 opt;
        u16 mtu;
        u8 mode;
@@ -911,11 +908,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 
                sec.level = BT_SECURITY_LOW;
 
-               len = min_t(unsigned int, sizeof(sec), optlen);
-               if (copy_from_sockptr(&sec, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (sec.level < BT_SECURITY_LOW ||
                    sec.level > BT_SECURITY_FIPS) {
@@ -960,10 +955,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt) {
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -975,10 +969,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_FLUSHABLE:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt > BT_FLUSHABLE_ON) {
                        err = -EINVAL;
@@ -1010,11 +1003,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 
                pwr.force_active = BT_POWER_FORCE_ACTIVE_ON;
 
-               len = min_t(unsigned int, sizeof(pwr), optlen);
-               if (copy_from_sockptr(&pwr, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&pwr, sizeof(pwr), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (pwr.force_active)
                        set_bit(FLAG_FORCE_ACTIVE, &chan->flags);
@@ -1023,10 +1014,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_CHANNEL_POLICY:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                err = -EOPNOTSUPP;
                break;
@@ -1055,10 +1045,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&mtu, optval, sizeof(u16))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&mtu, sizeof(mtu), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (chan->mode == L2CAP_MODE_EXT_FLOWCTL &&
                    sk->sk_state == BT_CONNECTED)
@@ -1086,10 +1075,9 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&mode, optval, sizeof(u8))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&mode, sizeof(mode), optval, optlen);
+               if (err)
                        break;
-               }
 
                BT_DBG("mode %u", mode);
 
index b54e8a530f55a1ff9547a2a5546db34059ebd672..29aa07e9db9d7122bac6ac0c6dfcd76765f11cb8 100644 (file)
@@ -629,7 +629,7 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname,
 
        switch (optname) {
        case RFCOMM_LM:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
+               if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) {
                        err = -EFAULT;
                        break;
                }
@@ -664,7 +664,6 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
        struct sock *sk = sock->sk;
        struct bt_security sec;
        int err = 0;
-       size_t len;
        u32 opt;
 
        BT_DBG("sk %p", sk);
@@ -686,11 +685,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
 
                sec.level = BT_SECURITY_LOW;
 
-               len = min_t(unsigned int, sizeof(sec), optlen);
-               if (copy_from_sockptr(&sec, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (sec.level > BT_SECURITY_HIGH) {
                        err = -EINVAL;
@@ -706,10 +703,9 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
index 43daf965a01e4ac5c9329150080b00dcd63c7e1c..368e026f4d15ca4711737af941ad30c7b48b827f 100644 (file)
@@ -824,7 +824,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                               sockptr_t optval, unsigned int optlen)
 {
        struct sock *sk = sock->sk;
-       int len, err = 0;
+       int err = 0;
        struct bt_voice voice;
        u32 opt;
        struct bt_codecs *codecs;
@@ -843,10 +843,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags);
@@ -863,11 +862,10 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
 
                voice.setting = sco_pi(sk)->setting;
 
-               len = min_t(unsigned int, sizeof(voice), optlen);
-               if (copy_from_sockptr(&voice, optval, len)) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&voice, sizeof(voice), optval,
+                                          optlen);
+               if (err)
                        break;
-               }
 
                /* Explicitly check for these values */
                if (voice.setting != BT_VOICE_TRANSPARENT &&
@@ -890,10 +888,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                break;
 
        case BT_PKT_STATUS:
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt)
                        set_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags);
@@ -934,9 +931,9 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(buffer, optval, optlen)) {
+               err = bt_copy_from_sockptr(buffer, optlen, optval, optlen);
+               if (err) {
                        hci_dev_put(hdev);
-                       err = -EFAULT;
                        break;
                }
 
index f21097e734827891f87adb9d0a1f7cebf9f15380..ceaa5a89b947fc574ee2a05003db3de7cc9797b1 100644 (file)
@@ -30,7 +30,7 @@ br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
        return netif_receive_skb(skb);
 }
 
-static int br_pass_frame_up(struct sk_buff *skb)
+static int br_pass_frame_up(struct sk_buff *skb, bool promisc)
 {
        struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
        struct net_bridge *br = netdev_priv(brdev);
@@ -65,6 +65,8 @@ static int br_pass_frame_up(struct sk_buff *skb)
        br_multicast_count(br, NULL, skb, br_multicast_igmp_type(skb),
                           BR_MCAST_DIR_TX);
 
+       BR_INPUT_SKB_CB(skb)->promisc = promisc;
+
        return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
                       dev_net(indev), NULL, skb, indev, NULL,
                       br_netif_receive_skb);
@@ -82,6 +84,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        struct net_bridge_mcast *brmctx;
        struct net_bridge_vlan *vlan;
        struct net_bridge *br;
+       bool promisc;
        u16 vid = 0;
        u8 state;
 
@@ -137,7 +140,9 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        if (p->flags & BR_LEARNING)
                br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, 0);
 
-       local_rcv = !!(br->dev->flags & IFF_PROMISC);
+       promisc = !!(br->dev->flags & IFF_PROMISC);
+       local_rcv = promisc;
+
        if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) {
                /* by definition the broadcast is also a multicast address */
                if (is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) {
@@ -200,7 +205,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
                unsigned long now = jiffies;
 
                if (test_bit(BR_FDB_LOCAL, &dst->flags))
-                       return br_pass_frame_up(skb);
+                       return br_pass_frame_up(skb, false);
 
                if (now != dst->used)
                        dst->used = now;
@@ -213,7 +218,7 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb
        }
 
        if (local_rcv)
-               return br_pass_frame_up(skb);
+               return br_pass_frame_up(skb, promisc);
 
 out:
        return 0;
@@ -386,6 +391,8 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
                                goto forward;
                }
 
+               BR_INPUT_SKB_CB(skb)->promisc = false;
+
                /* The else clause should be hit when nf_hook():
                 *   - returns < 0 (drop/error)
                 *   - returns = 0 (stolen/nf_queue)
index 35e10c5a766d550e0c5cb85cf5a0c4835b52a89d..22e35623c148ac41056d7c24e3996227726ec1a6 100644 (file)
@@ -600,11 +600,17 @@ static unsigned int br_nf_local_in(void *priv,
                                   struct sk_buff *skb,
                                   const struct nf_hook_state *state)
 {
+       bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
        struct nf_conntrack *nfct = skb_nfct(skb);
        const struct nf_ct_hook *ct_hook;
        struct nf_conn *ct;
        int ret;
 
+       if (promisc) {
+               nf_reset_ct(skb);
+               return NF_ACCEPT;
+       }
+
        if (!nfct || skb->pkt_type == PACKET_HOST)
                return NF_ACCEPT;
 
index 86ea5e6689b5ce49a4b71b383893d2ef5b53d110..d4bedc87b1d8f1bcf96c714fc80078227470550a 100644 (file)
@@ -589,6 +589,7 @@ struct br_input_skb_cb {
 #endif
        u8 proxyarp_replied:1;
        u8 src_port_isolated:1;
+       u8 promisc:1;
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
        u8 vlan_filtered:1;
 #endif
index 6f877e31709bad3646ea15bf3a96999ed275bdc1..c3c51b9a68265b443326432274e7fd75675e0e28 100644 (file)
@@ -294,18 +294,24 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
 static unsigned int nf_ct_bridge_in(void *priv, struct sk_buff *skb,
                                    const struct nf_hook_state *state)
 {
-       enum ip_conntrack_info ctinfo;
+       bool promisc = BR_INPUT_SKB_CB(skb)->promisc;
+       struct nf_conntrack *nfct = skb_nfct(skb);
        struct nf_conn *ct;
 
-       if (skb->pkt_type == PACKET_HOST)
+       if (promisc) {
+               nf_reset_ct(skb);
+               return NF_ACCEPT;
+       }
+
+       if (!nfct || skb->pkt_type == PACKET_HOST)
                return NF_ACCEPT;
 
        /* nf_conntrack_confirm() cannot handle concurrent clones,
         * this happens for broad/multicast frames with e.g. macvlan on top
         * of the bridge device.
         */
-       ct = nf_ct_get(skb, &ctinfo);
-       if (!ct || nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
+       ct = container_of(nfct, struct nf_conn, ct_general);
+       if (nf_ct_is_confirmed(ct) || nf_ct_is_template(ct))
                return NF_ACCEPT;
 
        /* let inet prerouting call conntrack again */
index 984ff8b9d0e1aa5646a7237a8cf0b0a21c2aa559..331848eca7d3109d8043bba1f99e84d8e46d5507 100644 (file)
@@ -3775,6 +3775,10 @@ no_lock_out:
                return rc;
        }
 
+       if (unlikely(READ_ONCE(q->owner) == smp_processor_id())) {
+               kfree_skb_reason(skb, SKB_DROP_REASON_TC_RECLASSIFY_LOOP);
+               return NET_XMIT_DROP;
+       }
        /*
         * Heuristic to force contended enqueues to serialize on a
         * separate lock before trying to get qdisc main lock.
@@ -3814,7 +3818,9 @@ no_lock_out:
                qdisc_run_end(q);
                rc = NET_XMIT_SUCCESS;
        } else {
+               WRITE_ONCE(q->owner, smp_processor_id());
                rc = dev_qdisc_enqueue(skb, q, &to_free, txq);
+               WRITE_ONCE(q->owner, -1);
                if (qdisc_run_begin(q)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
index 48741352a88a72e0232977cc9f2cf172f45df89b..c484b1c0fc00a79a45a1c3e7fde230ce59cb67a3 100644 (file)
@@ -1050,6 +1050,11 @@ next:
                        e++;
                }
        }
+
+       /* Don't let NLM_DONE coalesce into a message, even if it could.
+        * Some user space expects NLM_DONE in a separate recv().
+        */
+       err = skb->len;
 out:
 
        cb->args[1] = e;
index b150c9929b12e86219a55c77da480e0c538b3449..14365b20f1c5c09964dd7024060116737f22cb63 100644 (file)
@@ -966,6 +966,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
@@ -1266,6 +1268,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
index 487670759578168c5ff53bce6642898fc41936b3..fe89a056eb06c43743b2d7449e59f4e9360ba223 100644 (file)
@@ -1118,6 +1118,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
@@ -1504,6 +1506,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
index c8f76f56dc1653371ca39663f29cc798b062e60d..d36ace160d426f6224f8e692f3b438ae863bb9b9 100644 (file)
@@ -926,13 +926,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
                icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
                peer->rate_last = jiffies;
                ++peer->n_redirects;
-#ifdef CONFIG_IP_ROUTE_VERBOSE
-               if (log_martians &&
+               if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians &&
                    peer->n_redirects == ip_rt_redirect_number)
                        net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
                                             &ip_hdr(skb)->saddr, inet_iif(skb),
                                             &ip_hdr(skb)->daddr, &gw);
-#endif
        }
 out_put_peer:
        inet_putpeer(peer);
index 92db9b474f2bdb0a2efc91ab2be6c83c8a46372d..779aa6ecdd499b6acd3aa8e14d73735f28b94649 100644 (file)
@@ -2091,9 +2091,10 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
                if (ipv6_addr_equal(&ifp->addr, addr)) {
                        if (!dev || ifp->idev->dev == dev ||
                            !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
-                               result = ifp;
-                               in6_ifa_hold(ifp);
-                               break;
+                               if (in6_ifa_hold_safe(ifp)) {
+                                       result = ifp;
+                                       break;
+                               }
                        }
                }
        }
index 7209419cfb0e9c295a3feb5ecd3f9e1720ca16dc..c1f62352a481454a505dcbfafc637f187abcf4e0 100644 (file)
@@ -1385,7 +1385,10 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
             struct nl_info *info, struct netlink_ext_ack *extack)
 {
        struct fib6_table *table = rt->fib6_table;
-       struct fib6_node *fn, *pn = NULL;
+       struct fib6_node *fn;
+#ifdef CONFIG_IPV6_SUBTREES
+       struct fib6_node *pn = NULL;
+#endif
        int err = -ENOMEM;
        int allow_create = 1;
        int replace_required = 0;
@@ -1409,9 +1412,9 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
                goto out;
        }
 
+#ifdef CONFIG_IPV6_SUBTREES
        pn = fn;
 
-#ifdef CONFIG_IPV6_SUBTREES
        if (rt->fib6_src.plen) {
                struct fib6_node *sn;
 
index 636b360311c5365fba2330f6ca2f7f1b6dd1363e..131f7bb2110d3a08244c6da40ff9be45a2be711b 100644 (file)
@@ -1135,6 +1135,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
@@ -1513,6 +1515,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
                return -ENOMEM;
        if (tmp.num_counters == 0)
                return -EINVAL;
+       if ((u64)len < (u64)tmp.size + sizeof(tmp))
+               return -EINVAL;
 
        tmp.name[sizeof(tmp.name)-1] = 0;
 
index 9505f9d188ff257a8ca35f30ee111c2f19805a5a..6eef15648b7b0853fb249288bf4545dca3a2cf85 100644 (file)
@@ -21,7 +21,8 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
                proto = veth->h_vlan_encapsulated_proto;
                break;
        case htons(ETH_P_PPP_SES):
-               proto = nf_flow_pppoe_proto(skb);
+               if (!nf_flow_pppoe_proto(skb, &proto))
+                       return NF_ACCEPT;
                break;
        default:
                proto = skb->protocol;
index e45fade764096182443814e8dcd70700e7956742..5383bed3d3e002661f01468e1a8bef8425e229b4 100644 (file)
@@ -157,7 +157,7 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
                tuple->encap[i].proto = skb->protocol;
                break;
        case htons(ETH_P_PPP_SES):
-               phdr = (struct pppoe_hdr *)skb_mac_header(skb);
+               phdr = (struct pppoe_hdr *)skb_network_header(skb);
                tuple->encap[i].id = ntohs(phdr->sid);
                tuple->encap[i].proto = skb->protocol;
                break;
@@ -273,10 +273,11 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
        return NF_STOLEN;
 }
 
-static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
+static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto,
                                       u32 *offset)
 {
        struct vlan_ethhdr *veth;
+       __be16 inner_proto;
 
        switch (skb->protocol) {
        case htons(ETH_P_8021Q):
@@ -287,7 +288,8 @@ static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
                }
                break;
        case htons(ETH_P_PPP_SES):
-               if (nf_flow_pppoe_proto(skb) == proto) {
+               if (nf_flow_pppoe_proto(skb, &inner_proto) &&
+                   inner_proto == proto) {
                        *offset += PPPOE_SES_HLEN;
                        return true;
                }
@@ -316,7 +318,7 @@ static void nf_flow_encap_pop(struct sk_buff *skb,
                        skb_reset_network_header(skb);
                        break;
                case htons(ETH_P_PPP_SES):
-                       skb->protocol = nf_flow_pppoe_proto(skb);
+                       skb->protocol = __nf_flow_pppoe_proto(skb);
                        skb_pull(skb, PPPOE_SES_HLEN);
                        skb_reset_network_header(skb);
                        break;
index d89d779467197a0846406e0b0ce6938e8a3d404d..167074283ea91dff50a7aa0299a5794bcddeb32a 100644 (file)
@@ -594,6 +594,12 @@ static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
                                  const struct nft_set_iter *iter,
                                  struct nft_elem_priv *elem_priv)
 {
+       struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
+       nft_set_elem_change_active(ctx->net, set, ext);
        nft_setelem_data_deactivate(ctx->net, set, elem_priv);
 
        return 0;
@@ -617,6 +623,7 @@ static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
                if (!nft_set_elem_active(ext, genmask))
                        continue;
 
+               nft_set_elem_change_active(ctx->net, set, ext);
                nft_setelem_data_deactivate(ctx->net, set, catchall->elem);
                break;
        }
@@ -626,6 +633,7 @@ static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
 {
        struct nft_set_iter iter = {
                .genmask        = nft_genmask_next(ctx->net),
+               .type           = NFT_ITER_UPDATE,
                .fn             = nft_mapelem_deactivate,
        };
 
@@ -3060,7 +3068,7 @@ static const struct nft_expr_type *__nft_expr_type_get(u8 family,
 {
        const struct nft_expr_type *type, *candidate = NULL;
 
-       list_for_each_entry(type, &nf_tables_expressions, list) {
+       list_for_each_entry_rcu(type, &nf_tables_expressions, list) {
                if (!nla_strcmp(nla, type->name)) {
                        if (!type->family && !candidate)
                                candidate = type;
@@ -3092,9 +3100,13 @@ static const struct nft_expr_type *nft_expr_type_get(struct net *net,
        if (nla == NULL)
                return ERR_PTR(-EINVAL);
 
+       rcu_read_lock();
        type = __nft_expr_type_get(family, nla);
-       if (type != NULL && try_module_get(type->owner))
+       if (type != NULL && try_module_get(type->owner)) {
+               rcu_read_unlock();
                return type;
+       }
+       rcu_read_unlock();
 
        lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
@@ -3875,6 +3887,9 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
        const struct nft_data *data;
        int err;
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
            *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
                return 0;
@@ -3898,17 +3913,20 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
 
 int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
 {
-       u8 genmask = nft_genmask_next(ctx->net);
+       struct nft_set_iter dummy_iter = {
+               .genmask        = nft_genmask_next(ctx->net),
+       };
        struct nft_set_elem_catchall *catchall;
+
        struct nft_set_ext *ext;
        int ret = 0;
 
        list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
                ext = nft_set_elem_ext(set, catchall->elem);
-               if (!nft_set_elem_active(ext, genmask))
+               if (!nft_set_elem_active(ext, dummy_iter.genmask))
                        continue;
 
-               ret = nft_setelem_validate(ctx, set, NULL, catchall->elem);
+               ret = nft_setelem_validate(ctx, set, &dummy_iter, catchall->elem);
                if (ret < 0)
                        return ret;
        }
@@ -5397,6 +5415,11 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx,
                                        const struct nft_set_iter *iter,
                                        struct nft_elem_priv *elem_priv)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        return nft_setelem_data_validate(ctx, set, elem_priv);
 }
 
@@ -5441,6 +5464,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
                }
 
                iter.genmask    = nft_genmask_next(ctx->net);
+               iter.type       = NFT_ITER_UPDATE;
                iter.skip       = 0;
                iter.count      = 0;
                iter.err        = 0;
@@ -5488,6 +5512,13 @@ static int nft_mapelem_activate(const struct nft_ctx *ctx,
                                const struct nft_set_iter *iter,
                                struct nft_elem_priv *elem_priv)
 {
+       struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+
+       /* called from abort path, reverse check to undo changes. */
+       if (nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
+       nft_clear(ctx->net, ext);
        nft_setelem_data_activate(ctx->net, set, elem_priv);
 
        return 0;
@@ -5505,6 +5536,7 @@ static void nft_map_catchall_activate(const struct nft_ctx *ctx,
                if (!nft_set_elem_active(ext, genmask))
                        continue;
 
+               nft_clear(ctx->net, ext);
                nft_setelem_data_activate(ctx->net, set, catchall->elem);
                break;
        }
@@ -5514,6 +5546,7 @@ static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
 {
        struct nft_set_iter iter = {
                .genmask        = nft_genmask_next(ctx->net),
+               .type           = NFT_ITER_UPDATE,
                .fn             = nft_mapelem_activate,
        };
 
@@ -5778,6 +5811,9 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
        const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
        struct nft_set_dump_args *args;
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        if (nft_set_elem_expired(ext) || nft_set_elem_is_dead(ext))
                return 0;
 
@@ -5888,6 +5924,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
        args.skb                = skb;
        args.reset              = dump_ctx->reset;
        args.iter.genmask       = nft_genmask_cur(net);
+       args.iter.type          = NFT_ITER_READ;
        args.iter.skip          = cb->args[0];
        args.iter.count         = 0;
        args.iter.err           = 0;
@@ -6627,7 +6664,7 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
        struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
 
        if (nft_setelem_is_catchall(set, elem_priv)) {
-               nft_set_elem_change_active(net, set, ext);
+               nft_clear(net, ext);
        } else {
                set->ops->activate(net, set, elem_priv);
        }
@@ -7186,6 +7223,16 @@ void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
        }
 }
 
+static int nft_setelem_active_next(const struct net *net,
+                                  const struct nft_set *set,
+                                  struct nft_elem_priv *elem_priv)
+{
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
+       u8 genmask = nft_genmask_next(net);
+
+       return nft_set_elem_active(ext, genmask);
+}
+
 static void nft_setelem_data_activate(const struct net *net,
                                      const struct nft_set *set,
                                      struct nft_elem_priv *elem_priv)
@@ -7309,8 +7356,12 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
                             const struct nft_set_iter *iter,
                             struct nft_elem_priv *elem_priv)
 {
+       const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
        struct nft_trans *trans;
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
                                    sizeof(struct nft_trans_elem), GFP_ATOMIC);
        if (!trans)
@@ -7372,6 +7423,7 @@ static int nft_set_flush(struct nft_ctx *ctx, struct nft_set *set, u8 genmask)
 {
        struct nft_set_iter iter = {
                .genmask        = genmask,
+               .type           = NFT_ITER_UPDATE,
                .fn             = nft_setelem_flush,
        };
 
@@ -7607,7 +7659,7 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
 {
        const struct nft_object_type *type;
 
-       list_for_each_entry(type, &nf_tables_objects, list) {
+       list_for_each_entry_rcu(type, &nf_tables_objects, list) {
                if (type->family != NFPROTO_UNSPEC &&
                    type->family != family)
                        continue;
@@ -7623,9 +7675,13 @@ nft_obj_type_get(struct net *net, u32 objtype, u8 family)
 {
        const struct nft_object_type *type;
 
+       rcu_read_lock();
        type = __nft_obj_type_get(objtype, family);
-       if (type != NULL && try_module_get(type->owner))
+       if (type != NULL && try_module_get(type->owner)) {
+               rcu_read_unlock();
                return type;
+       }
+       rcu_read_unlock();
 
        lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
@@ -10598,8 +10654,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                case NFT_MSG_DESTROYSETELEM:
                        te = (struct nft_trans_elem *)trans->data;
 
-                       nft_setelem_data_activate(net, te->set, te->elem_priv);
-                       nft_setelem_activate(net, te->set, te->elem_priv);
+                       if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
+                               nft_setelem_data_activate(net, te->set, te->elem_priv);
+                               nft_setelem_activate(net, te->set, te->elem_priv);
+                       }
                        if (!nft_setelem_is_catchall(te->set, te->elem_priv))
                                te->set->ndeact--;
 
@@ -10787,6 +10845,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
 {
        const struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
 
+       if (!nft_set_elem_active(ext, iter->genmask))
+               return 0;
+
        if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
            *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
                return 0;
@@ -10871,6 +10932,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
                                continue;
 
                        iter.genmask    = nft_genmask_next(ctx->net);
+                       iter.type       = NFT_ITER_UPDATE;
                        iter.skip       = 0;
                        iter.count      = 0;
                        iter.err        = 0;
index a0055f510e31e9b77526a11c66c565b973897706..b314ca728a2912da717995840ef3dc337eace815 100644 (file)
@@ -216,6 +216,7 @@ static int nft_lookup_validate(const struct nft_ctx *ctx,
                return 0;
 
        iter.genmask    = nft_genmask_next(ctx->net);
+       iter.type       = NFT_ITER_UPDATE;
        iter.skip       = 0;
        iter.count      = 0;
        iter.err        = 0;
index 32df7a16835da3e1d850d34a8236e0a45f06f026..1caa04619dc6da37f845acc65c8ca86c173096de 100644 (file)
@@ -172,7 +172,7 @@ static void nft_bitmap_activate(const struct net *net,
        nft_bitmap_location(set, nft_set_ext_key(&be->ext), &idx, &off);
        /* Enter 11 state. */
        priv->bitmap[idx] |= (genmask << off);
-       nft_set_elem_change_active(net, set, &be->ext);
+       nft_clear(net, &be->ext);
 }
 
 static void nft_bitmap_flush(const struct net *net,
@@ -222,8 +222,6 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
        list_for_each_entry_rcu(be, &priv->list, head) {
                if (iter->count < iter->skip)
                        goto cont;
-               if (!nft_set_elem_active(&be->ext, iter->genmask))
-                       goto cont;
 
                iter->err = iter->fn(ctx, set, iter, &be->priv);
 
index 6968a3b342367c6c0cb0df7523fdfd5864038802..daa56dda737ae2e6b4727c2d3930d68e58a33efb 100644 (file)
@@ -199,7 +199,7 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
 {
        struct nft_rhash_elem *he = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &he->ext);
+       nft_clear(net, &he->ext);
 }
 
 static void nft_rhash_flush(const struct net *net,
@@ -286,8 +286,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (!nft_set_elem_active(&he->ext, iter->genmask))
-                       goto cont;
 
                iter->err = iter->fn(ctx, set, iter, &he->priv);
                if (iter->err < 0)
@@ -599,7 +597,7 @@ static void nft_hash_activate(const struct net *net, const struct nft_set *set,
 {
        struct nft_hash_elem *he = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &he->ext);
+       nft_clear(net, &he->ext);
 }
 
 static void nft_hash_flush(const struct net *net,
@@ -652,8 +650,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
                hlist_for_each_entry_rcu(he, &priv->table[i], node) {
                        if (iter->count < iter->skip)
                                goto cont;
-                       if (!nft_set_elem_active(&he->ext, iter->genmask))
-                               goto cont;
 
                        iter->err = iter->fn(ctx, set, iter, &he->priv);
                        if (iter->err < 0)
index df8de50902463738642d4d24b59f12b17b5ff726..187138afac45d479f89ea23ec9b09fcd6b6da866 100644 (file)
@@ -1847,7 +1847,7 @@ static void nft_pipapo_activate(const struct net *net,
 {
        struct nft_pipapo_elem *e = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &e->ext);
+       nft_clear(net, &e->ext);
 }
 
 /**
@@ -2077,6 +2077,8 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
                rules_fx = rules_f0;
 
                nft_pipapo_for_each_field(f, i, m) {
+                       bool last = i == m->field_count - 1;
+
                        if (!pipapo_match_field(f, start, rules_fx,
                                                match_start, match_end))
                                break;
@@ -2089,16 +2091,18 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
 
                        match_start += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
                        match_end += NFT_PIPAPO_GROUPS_PADDED_SIZE(f);
-               }
 
-               if (i == m->field_count) {
-                       priv->dirty = true;
-                       pipapo_drop(m, rulemap);
-                       return;
+                       if (last && f->mt[rulemap[i].to].e == e) {
+                               priv->dirty = true;
+                               pipapo_drop(m, rulemap);
+                               return;
+                       }
                }
 
                first_rule += rules_f0;
        }
+
+       WARN_ON_ONCE(1); /* elem_priv not found */
 }
 
 /**
@@ -2115,13 +2119,15 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
                            struct nft_set_iter *iter)
 {
        struct nft_pipapo *priv = nft_set_priv(set);
-       struct net *net = read_pnet(&set->net);
        const struct nft_pipapo_match *m;
        const struct nft_pipapo_field *f;
        unsigned int i, r;
 
+       WARN_ON_ONCE(iter->type != NFT_ITER_READ &&
+                    iter->type != NFT_ITER_UPDATE);
+
        rcu_read_lock();
-       if (iter->genmask == nft_genmask_cur(net))
+       if (iter->type == NFT_ITER_READ)
                m = rcu_dereference(priv->match);
        else
                m = priv->clone;
@@ -2143,9 +2149,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
 
                e = f->mt[r].e;
 
-               if (!nft_set_elem_active(&e->ext, iter->genmask))
-                       goto cont;
-
                iter->err = iter->fn(ctx, set, iter, &e->priv);
                if (iter->err < 0)
                        goto out;
index 9944fe479e5361dc140f75be8b90bf3c5deb40f6..b7ea21327549b353c087b3e607e722f391ea94c1 100644 (file)
@@ -532,7 +532,7 @@ static void nft_rbtree_activate(const struct net *net,
 {
        struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
 
-       nft_set_elem_change_active(net, set, &rbe->ext);
+       nft_clear(net, &rbe->ext);
 }
 
 static void nft_rbtree_flush(const struct net *net,
@@ -600,8 +600,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
 
                if (iter->count < iter->skip)
                        goto cont;
-               if (!nft_set_elem_active(&rbe->ext, iter->genmask))
-                       goto cont;
 
                iter->err = iter->fn(ctx, set, iter, &rbe->priv);
                if (iter->err < 0) {
index 819157bbb5a2c6ef775633931721490b747f2fc8..d5344563e525c9bc436d5ad0b84380f0bcae62a8 100644 (file)
@@ -252,10 +252,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = copy_safe_from_sockptr(&opt, sizeof(opt),
+                                            optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt > LLCP_MAX_RW) {
                        err = -EINVAL;
@@ -274,10 +274,10 @@ static int nfc_llcp_setsockopt(struct socket *sock, int level, int optname,
                        break;
                }
 
-               if (copy_from_sockptr(&opt, optval, sizeof(u32))) {
-                       err = -EFAULT;
+               err = copy_safe_from_sockptr(&opt, sizeof(opt),
+                                            optval, optlen);
+               if (err)
                        break;
-               }
 
                if (opt > LLCP_MAX_MIUX) {
                        err = -EINVAL;
index 3019a4406ca4f72be806ff922e377ea7609c3934..74b63cdb59923a95dd03a9c2c540af702564873a 100644 (file)
@@ -1380,8 +1380,9 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
        if (ct_info.timeout[0]) {
                if (nf_ct_set_timeout(net, ct_info.ct, family, key->ip.proto,
                                      ct_info.timeout))
-                       pr_info_ratelimited("Failed to associated timeout "
-                                           "policy `%s'\n", ct_info.timeout);
+                       OVS_NLERR(log,
+                                 "Failed to associated timeout policy '%s'",
+                                 ct_info.timeout);
                else
                        ct_info.nf_ct_timeout = rcu_dereference(
                                nf_ct_timeout_find(ct_info.ct)->timeout);
index ff5336493777507242320d7e9214c637663f0734..4a2c763e2d116693469e6c8bd9ce0ed8f7f667d9 100644 (file)
@@ -974,6 +974,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
        sch->enqueue = ops->enqueue;
        sch->dequeue = ops->dequeue;
        sch->dev_queue = dev_queue;
+       sch->owner = -1;
        netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
        refcount_set(&sch->refcnt, 1);
 
index 5b41e2321209ae0a17ac97d7214eefd252ec0180..9a6ad5974dff5e855cbc0ba2a1f7837733420c5f 100644 (file)
@@ -2663,9 +2663,13 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk,
                                        WRITE_ONCE(u->oob_skb, NULL);
                                        consume_skb(skb);
                                }
-                       } else if (!(flags & MSG_PEEK)) {
+                       } else if (flags & MSG_PEEK) {
+                               skb = NULL;
+                       } else {
                                skb_unlink(skb, &sk->sk_receive_queue);
-                               consume_skb(skb);
+                               WRITE_ONCE(u->oob_skb, NULL);
+                               if (!WARN_ON_ONCE(skb_unref(skb)))
+                                       kfree_skb(skb);
                                skb = skb_peek(&sk->sk_receive_queue);
                        }
                }
@@ -2739,18 +2743,16 @@ redo:
                last = skb = skb_peek(&sk->sk_receive_queue);
                last_len = last ? last->len : 0;
 
+again:
 #if IS_ENABLED(CONFIG_AF_UNIX_OOB)
                if (skb) {
                        skb = manage_oob(skb, sk, flags, copied);
-                       if (!skb) {
+                       if (!skb && copied) {
                                unix_state_unlock(sk);
-                               if (copied)
-                                       break;
-                               goto redo;
+                               break;
                        }
                }
 #endif
-again:
                if (skb == NULL) {
                        if (copied >= target)
                                goto unlock;
index fa39b626523851df29275f1448d30a7390e7e0fb..6433a414acf8624a1d98727f4e309b7c040710b9 100644 (file)
@@ -274,11 +274,22 @@ static void __unix_gc(struct work_struct *work)
         * receive queues.  Other, non candidate sockets _can_ be
         * added to queue, so we must make sure only to touch
         * candidates.
+        *
+        * Embryos, though never candidates themselves, affect which
+        * candidates are reachable by the garbage collector.  Before
+        * being added to a listener's queue, an embryo may already
+        * receive data carrying SCM_RIGHTS, potentially making the
+        * passed socket a candidate that is not yet reachable by the
+        * collector.  It becomes reachable once the embryo is
+        * enqueued.  Therefore, we must ensure that no SCM-laden
+        * embryo appears in a (candidate) listener's queue between
+        * consecutive scan_children() calls.
         */
        list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
+               struct sock *sk = &u->sk;
                long total_refs;
 
-               total_refs = file_count(u->sk.sk_socket->file);
+               total_refs = file_count(sk->sk_socket->file);
 
                WARN_ON_ONCE(!u->inflight);
                WARN_ON_ONCE(total_refs < u->inflight);
@@ -286,6 +297,11 @@ static void __unix_gc(struct work_struct *work)
                        list_move_tail(&u->link, &gc_candidates);
                        __set_bit(UNIX_GC_CANDIDATE, &u->gc_flags);
                        __set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags);
+
+                       if (sk->sk_state == TCP_LISTEN) {
+                               unix_state_lock(sk);
+                               unix_state_unlock(sk);
+                       }
                }
        }
 
index 3404d076a8a3e6a9f43dfca301d3e00078afb934..727aa20be4bde8dc63a544a44a5cdeb19cac7dcb 100644 (file)
@@ -1417,6 +1417,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
                struct xsk_queue **q;
                int entries;
 
+               if (optlen < sizeof(entries))
+                       return -EINVAL;
                if (copy_from_sockptr(&entries, optval, sizeof(entries)))
                        return -EFAULT;
 
index c5c2ce113c9232c331c4ebac2ba4384a24424640..d20c47d21ad8352973c93ccefc7b0931e93de545 100644 (file)
@@ -467,6 +467,8 @@ static bool stackleak_gate(void)
                        return false;
                if (STRING_EQUAL(section, ".entry.text"))
                        return false;
+               if (STRING_EQUAL(section, ".head.text"))
+                       return false;
        }
 
        return track_frame_size >= 0;
index b141024830ecc831430c697858ce409a56c711af..ee6ac649df836d695133f93da9cfd0518cc1ae23 100644 (file)
@@ -428,7 +428,7 @@ static int cvt_ump_midi2_to_midi1(struct snd_seq_client *dest,
        midi1->note.group = midi2->note.group;
        midi1->note.status = midi2->note.status;
        midi1->note.channel = midi2->note.channel;
-       switch (midi2->note.status << 4) {
+       switch (midi2->note.status) {
        case UMP_MSG_STATUS_NOTE_ON:
        case UMP_MSG_STATUS_NOTE_OFF:
                midi1->note.note = midi2->note.note;
index cdcb28aa9d7bf028d429aeea0016cec7c6bc0c22..70d80b6af3fe370aa9c6f9fd471676ba9c18da72 100644 (file)
@@ -7467,6 +7467,10 @@ enum {
        ALC285_FIXUP_CS35L56_I2C_2,
        ALC285_FIXUP_CS35L56_I2C_4,
        ALC285_FIXUP_ASUS_GA403U,
+       ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC,
+       ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1,
+       ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+       ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9690,6 +9694,38 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc285_fixup_asus_ga403u,
        },
+       [ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11050 },
+                       { 0x1b, 0x03a11c30 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1
+       },
+       [ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc285_fixup_speaker2_to_dac1,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC,
+       },
+       [ALC285_FIXUP_ASUS_GU605_SPI_2_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x03a11050 },
+                       { 0x1b, 0x03a11c30 },
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC285_FIXUP_CS35L56_SPI_2
+       },
+       [ALC285_FIXUP_ASUS_GA403U_I2C_SPEAKER2_TO_DAC1] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc285_fixup_speaker2_to_dac1,
+               .chained = true,
+               .chain_id = ALC285_FIXUP_ASUS_GA403U,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10084,6 +10120,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8cdd, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x103c, 0x8cde, "HP Spectre", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
@@ -10143,7 +10181,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
-       SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U),
+       SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10151,7 +10189,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
-       SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_CS35L56_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_ASUS_GU605_SPI_SPEAKER2_TO_DAC1),
        SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
        SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
@@ -10228,6 +10266,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x152d, 0x1082, "Quanta NL3", ALC269_FIXUP_LIFEBOOK),
+       SND_PCI_QUIRK(0x152d, 0x1262, "Huawei NBLB-WAX9N", ALC2XX_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1558, 0x0353, "Clevo V35[05]SN[CDE]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1323, "Clevo N130ZU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x1325, "Clevo N15[01][CW]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -10333,6 +10372,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x222e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2231, "Thinkpad T560", ALC292_FIXUP_TPT460),
        SND_PCI_QUIRK(0x17aa, 0x2233, "Thinkpad", ALC292_FIXUP_TPT460),
+       SND_PCI_QUIRK(0x17aa, 0x2234, "Thinkpad ICE-1", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x2245, "Thinkpad T470", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2246, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
        SND_PCI_QUIRK(0x17aa, 0x2247, "Thinkpad", ALC298_FIXUP_TPT470_DOCK),
@@ -10394,8 +10434,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x3886, "Y780 VECO DUAL", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x38a7, "Y780P AMD YG dual", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x38a8, "Y780P AMD VECO dual", ALC287_FIXUP_TAS2781_I2C),
-       SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
-       SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x17aa, 0x38a9, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
+       SND_PCI_QUIRK(0x17aa, 0x38ab, "Thinkbook 16P", ALC287_FIXUP_MG_RTKC_CSAMP_CS35L41_I2C_THINKPAD),
        SND_PCI_QUIRK(0x17aa, 0x38b4, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x17aa, 0x38b5, "Legion Slim 7 16IRH8", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x17aa, 0x38b6, "Legion Slim 7 16APH8", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10457,6 +10497,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
        SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
        SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
index 48dae3339305048fca2262821e5ebfea6bcf237a..75f7674c66ee7ae8d9c407798193f4618b82a6b7 100644 (file)
@@ -514,10 +514,10 @@ static int tas2563_save_calibration(struct tasdevice_priv *tas_priv)
 static void tas2781_apply_calib(struct tasdevice_priv *tas_priv)
 {
        static const unsigned char page_array[CALIB_MAX] = {
-               0x17, 0x18, 0x18, 0x0d, 0x18
+               0x17, 0x18, 0x18, 0x13, 0x18,
        };
        static const unsigned char rgno_array[CALIB_MAX] = {
-               0x74, 0x0c, 0x14, 0x3c, 0x7c
+               0x74, 0x0c, 0x14, 0x70, 0x7c,
        };
        unsigned char *data;
        int i, j, rc;
index 318e2dad27e048c08fea615cef8654aa1fcb7d81..ae57bf69ad4af3aa51fd364e89676469dd87a14d 100644 (file)
@@ -76,6 +76,12 @@ enum {
        DNS
 };
 
+enum {
+       IPV4 = 1,
+       IPV6,
+       IP_TYPE_MAX
+};
+
 static int in_hand_shake;
 
 static char *os_name = "";
@@ -102,6 +108,11 @@ static struct utsname uts_buf;
 
 #define MAX_FILE_NAME 100
 #define ENTRIES_PER_BLOCK 50
+/*
+ * Change this entry if the number of addresses increases in future
+ */
+#define MAX_IP_ENTRIES 64
+#define OUTSTR_BUF_SIZE ((INET6_ADDRSTRLEN + 1) * MAX_IP_ENTRIES)
 
 struct kvp_record {
        char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
@@ -1171,6 +1182,18 @@ static int process_ip_string(FILE *f, char *ip_string, int type)
        return 0;
 }
 
+int ip_version_check(const char *input_addr)
+{
+       struct in6_addr addr;
+
+       if (inet_pton(AF_INET, input_addr, &addr))
+               return IPV4;
+       else if (inet_pton(AF_INET6, input_addr, &addr))
+               return IPV6;
+
+       return -EINVAL;
+}
+
 /*
  * Only IPv4 subnet strings needs to be converted to plen
  * For IPv6 the subnet is already privided in plen format
@@ -1197,14 +1220,75 @@ static int kvp_subnet_to_plen(char *subnet_addr_str)
        return plen;
 }
 
+static int process_dns_gateway_nm(FILE *f, char *ip_string, int type,
+                                 int ip_sec)
+{
+       char addr[INET6_ADDRSTRLEN], *output_str;
+       int ip_offset = 0, error = 0, ip_ver;
+       char *param_name;
+
+       if (type == DNS)
+               param_name = "dns";
+       else if (type == GATEWAY)
+               param_name = "gateway";
+       else
+               return -EINVAL;
+
+       output_str = (char *)calloc(OUTSTR_BUF_SIZE, sizeof(char));
+       if (!output_str)
+               return -ENOMEM;
+
+       while (1) {
+               memset(addr, 0, sizeof(addr));
+
+               if (!parse_ip_val_buffer(ip_string, &ip_offset, addr,
+                                        (MAX_IP_ADDR_SIZE * 2)))
+                       break;
+
+               ip_ver = ip_version_check(addr);
+               if (ip_ver < 0)
+                       continue;
+
+               if ((ip_ver == IPV4 && ip_sec == IPV4) ||
+                   (ip_ver == IPV6 && ip_sec == IPV6)) {
+                       /*
+                        * do a bound check to avoid out-of bound writes
+                        */
+                       if ((OUTSTR_BUF_SIZE - strlen(output_str)) >
+                           (strlen(addr) + 1)) {
+                               strncat(output_str, addr,
+                                       OUTSTR_BUF_SIZE -
+                                       strlen(output_str) - 1);
+                               strncat(output_str, ",",
+                                       OUTSTR_BUF_SIZE -
+                                       strlen(output_str) - 1);
+                       }
+               } else {
+                       continue;
+               }
+       }
+
+       if (strlen(output_str)) {
+               /*
+                * This is to get rid of that extra comma character
+                * in the end of the string
+                */
+               output_str[strlen(output_str) - 1] = '\0';
+               error = fprintf(f, "%s=%s\n", param_name, output_str);
+       }
+
+       free(output_str);
+       return error;
+}
+
 static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
-                               int is_ipv6)
+                               int ip_sec)
 {
        char addr[INET6_ADDRSTRLEN];
        char subnet_addr[INET6_ADDRSTRLEN];
-       int error, i = 0;
+       int error = 0, i = 0;
        int ip_offset = 0, subnet_offset = 0;
-       int plen;
+       int plen, ip_ver;
 
        memset(addr, 0, sizeof(addr));
        memset(subnet_addr, 0, sizeof(subnet_addr));
@@ -1216,10 +1300,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
                                                       subnet_addr,
                                                       (MAX_IP_ADDR_SIZE *
                                                        2))) {
-               if (!is_ipv6)
+               ip_ver = ip_version_check(addr);
+               if (ip_ver < 0)
+                       continue;
+
+               if (ip_ver == IPV4 && ip_sec == IPV4)
                        plen = kvp_subnet_to_plen((char *)subnet_addr);
-               else
+               else if (ip_ver == IPV6 && ip_sec == IPV6)
                        plen = atoi(subnet_addr);
+               else
+                       continue;
 
                if (plen < 0)
                        return plen;
@@ -1233,17 +1323,16 @@ static int process_ip_string_nm(FILE *f, char *ip_string, char *subnet,
                memset(subnet_addr, 0, sizeof(subnet_addr));
        }
 
-       return 0;
+       return error;
 }
 
 static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
 {
-       int error = 0;
+       int error = 0, ip_ver;
        char if_filename[PATH_MAX];
        char nm_filename[PATH_MAX];
        FILE *ifcfg_file, *nmfile;
        char cmd[PATH_MAX];
-       int is_ipv6 = 0;
        char *mac_addr;
        int str_len;
 
@@ -1421,52 +1510,94 @@ static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
        if (error)
                goto setval_error;
 
-       if (new_val->addr_family & ADDR_FAMILY_IPV6) {
-               error = fprintf(nmfile, "\n[ipv6]\n");
-               if (error < 0)
-                       goto setval_error;
-               is_ipv6 = 1;
-       } else {
-               error = fprintf(nmfile, "\n[ipv4]\n");
-               if (error < 0)
-                       goto setval_error;
-       }
-
        /*
         * Now we populate the keyfile format
+        *
+        * The keyfile format expects the IPv6 and IPv4 configuration in
+        * different sections. Therefore we iterate through the list twice,
+        * once to populate the IPv4 section and the next time for IPv6
         */
+       ip_ver = IPV4;
+       do {
+               if (ip_ver == IPV4) {
+                       error = fprintf(nmfile, "\n[ipv4]\n");
+                       if (error < 0)
+                               goto setval_error;
+               } else {
+                       error = fprintf(nmfile, "\n[ipv6]\n");
+                       if (error < 0)
+                               goto setval_error;
+               }
 
-       if (new_val->dhcp_enabled) {
-               error = kvp_write_file(nmfile, "method", "", "auto");
-               if (error < 0)
-                       goto setval_error;
-       } else {
-               error = kvp_write_file(nmfile, "method", "", "manual");
+               /*
+                * Write the configuration for ipaddress, netmask, gateway and
+                * name services
+                */
+               error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
+                                            (char *)new_val->sub_net,
+                                            ip_ver);
                if (error < 0)
                        goto setval_error;
-       }
 
-       /*
-        * Write the configuration for ipaddress, netmask, gateway and
-        * name services
-        */
-       error = process_ip_string_nm(nmfile, (char *)new_val->ip_addr,
-                                    (char *)new_val->sub_net, is_ipv6);
-       if (error < 0)
-               goto setval_error;
+               /*
+                * As dhcp_enabled is only valid for ipv4, we do not set dhcp
+                * methods for ipv6 based on dhcp_enabled flag.
+                *
+                * For ipv4, set method to manual only when dhcp_enabled is
+                * false and specific ipv4 addresses are configured. If neither
+                * dhcp_enabled is true and no ipv4 addresses are configured,
+                * set method to 'disabled'.
+                *
+                * For ipv6, set method to manual when we configure ipv6
+                * addresses. Otherwise set method to 'auto' so that SLAAC from
+                * RA may be used.
+                */
+               if (ip_ver == IPV4) {
+                       if (new_val->dhcp_enabled) {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "auto");
+                               if (error < 0)
+                                       goto setval_error;
+                       } else if (error) {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "manual");
+                               if (error < 0)
+                                       goto setval_error;
+                       } else {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "disabled");
+                               if (error < 0)
+                                       goto setval_error;
+                       }
+               } else if (ip_ver == IPV6) {
+                       if (error) {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "manual");
+                               if (error < 0)
+                                       goto setval_error;
+                       } else {
+                               error = kvp_write_file(nmfile, "method", "",
+                                                      "auto");
+                               if (error < 0)
+                                       goto setval_error;
+                       }
+               }
 
-       /* we do not want ipv4 addresses in ipv6 section and vice versa */
-       if (is_ipv6 != is_ipv4((char *)new_val->gate_way)) {
-               error = fprintf(nmfile, "gateway=%s\n", (char *)new_val->gate_way);
+               error = process_dns_gateway_nm(nmfile,
+                                              (char *)new_val->gate_way,
+                                              GATEWAY, ip_ver);
                if (error < 0)
                        goto setval_error;
-       }
 
-       if (is_ipv6 != is_ipv4((char *)new_val->dns_addr)) {
-               error = fprintf(nmfile, "dns=%s\n", (char *)new_val->dns_addr);
+               error = process_dns_gateway_nm(nmfile,
+                                              (char *)new_val->dns_addr, DNS,
+                                              ip_ver);
                if (error < 0)
                        goto setval_error;
-       }
+
+               ip_ver++;
+       } while (ip_ver < IP_TYPE_MAX);
+
        fclose(nmfile);
        fclose(ifcfg_file);
 
index 4b0673bf52c2e615017bf2b94da1f6fc4392e532..07cfad817d53908f2325505d2b9cb644a808a689 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/build_bug.h>
 #include <linux/compiler.h>
 #include <linux/math.h>
+#include <linux/panic.h>
 #include <endian.h>
 #include <byteswap.h>
 
index f3c82ab5b14cd77819030096b81e0b67cba0df1d..7d73da0980473fd3fdbdcd88e9e041077d5a2df3 100644 (file)
@@ -37,4 +37,9 @@ static inline void totalram_pages_add(long count)
 {
 }
 
+static inline int early_pfn_to_nid(unsigned long pfn)
+{
+       return 0;
+}
+
 #endif
diff --git a/tools/include/linux/panic.h b/tools/include/linux/panic.h
new file mode 100644 (file)
index 0000000..9c8f17a
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_PANIC_H
+#define _TOOLS_LINUX_PANIC_H
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static inline void panic(const char *fmt, ...)
+{
+       va_list argp;
+
+       va_start(argp, fmt);
+       vfprintf(stderr, fmt, argp);
+       va_end(argp);
+       exit(-1);
+}
+
+#endif
index 8f08c3fd498d5b81185519728fc1c28a8a0d4d5f..0d3672e5d9ed1553a720f3b0b52ca71f81fbc2d9 100644 (file)
@@ -67,6 +67,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou
 .PP
 \fB--quiet\fP Do not decode and print the system configuration header information.
 .PP
++\fB--no-msr\fP Disable all the uses of the MSR driver.
++.PP
++\fB--no-perf\fP Disable all the uses of the perf API.
++.PP
 \fB--interval seconds\fP overrides the default 5.0 second measurement interval.
 .PP
 \fB--num_iterations num\fP number of the measurement iterations.
@@ -125,9 +129,17 @@ The system configuration dump (if --quiet is not used) is followed by statistics
 .PP
 \fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
 .PP
-\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms or /sys/class/drm/card0/gt/gt0/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
 .PP
-\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt_cur_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBGFXAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt_act_freq_mhz or /sys/class/drm/card0/gt/gt0/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
+.PP
+\fBSAM%mc6\fP The percentage of time the SA Media is in the "module C6" state, mc6, during the measurement interval. From /sys/class/drm/card0/gt/gt1/rc6_residency_ms or /sys/class/drm/card0/device/tile0/gtN/gtidle/idle_residency_ms depending on the graphics driver being used.
+.PP
+\fBSAMMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/cur_freq depending on the graphics driver being used.
+.PP
+\fBSAMAMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/drm/card0/gt/gt1/rps_act_freq_mhz or /sys/class/drm/card0/device/tile0/gtN/freq0/act_freq depending on the graphics driver being used.
 .PP
 \fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states.  These numbers are from hardware residency counters.
 .PP
@@ -370,7 +382,7 @@ below the processor's base frequency.
 
 Busy% = MPERF_delta/TSC_delta
 
-Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+Bzy_MHz = TSC_delta*APERF_delta/MPERF_delta/measurement_interval
 
 Note that these calculations depend on TSC_delta, so they
 are not reliable during intervals when TSC_MHz is not running at the base frequency.
index 7a334377f92b978fa642a0071b19f33d7e6fe74e..98256468e24806acfc0daee374d0cf9877e92131 100644 (file)
@@ -3,7 +3,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel and AMD processors.
  *
- * Copyright (c) 2023 Intel Corporation.
+ * Copyright (c) 2024 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  */
 
@@ -36,6 +36,8 @@
 #include <linux/perf_event.h>
 #include <asm/unistd.h>
 #include <stdbool.h>
+#include <assert.h>
+#include <linux/kernel.h>
 
 #define UNUSED(x) (void)(x)
 
 #define        NAME_BYTES 20
 #define PATH_BYTES 128
 
+#define MAX_NOFILE 0x8000
+
 enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
 enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
 enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
+enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR };
+enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR };
 
 struct msr_counter {
        unsigned int msr_num;
@@ -127,6 +133,9 @@ struct msr_counter bic[] = {
        { 0x0, "IPC", "", 0, 0, 0, NULL, 0 },
        { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 },
        { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SAM%mc6", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SAMMHz", "", 0, 0, 0, NULL, 0 },
+       { 0x0, "SAMAMHz", "", 0, 0, 0, NULL, 0 },
 };
 
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -185,11 +194,14 @@ struct msr_counter bic[] = {
 #define        BIC_IPC         (1ULL << 52)
 #define        BIC_CORE_THROT_CNT      (1ULL << 53)
 #define        BIC_UNCORE_MHZ          (1ULL << 54)
+#define        BIC_SAM_mc6             (1ULL << 55)
+#define        BIC_SAMMHz              (1ULL << 56)
+#define        BIC_SAMACTMHz           (1ULL << 57)
 
 #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
 #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
-#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ)
-#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ)
+#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6)
 #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
 
 #define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -204,10 +216,13 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC
 #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
 #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
 
+struct amperf_group_fd;
+
 char *proc_stat = "/proc/stat";
 FILE *outf;
 int *fd_percpu;
 int *fd_instr_count_percpu;
+struct amperf_group_fd *fd_amperf_percpu;      /* File descriptors for perf group with APERF and MPERF counters. */
 struct timeval interval_tv = { 5, 0 };
 struct timespec interval_ts = { 5, 0 };
 
@@ -242,11 +257,8 @@ char *output_buffer, *outp;
 unsigned int do_dts;
 unsigned int do_ptm;
 unsigned int do_ipc;
-unsigned long long gfx_cur_rc6_ms;
 unsigned long long cpuidle_cur_cpu_lpi_us;
 unsigned long long cpuidle_cur_sys_lpi_us;
-unsigned int gfx_cur_mhz;
-unsigned int gfx_act_mhz;
 unsigned int tj_max;
 unsigned int tj_max_override;
 double rapl_power_units, rapl_time_units;
@@ -263,6 +275,28 @@ unsigned int has_hwp_epp;  /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;      /* IA32_HWP_REQUEST_PKG */
 unsigned int first_counter_read = 1;
 int ignore_stdin;
+bool no_msr;
+bool no_perf;
+enum amperf_source amperf_source;
+
+enum gfx_sysfs_idx {
+       GFX_rc6,
+       GFX_MHz,
+       GFX_ACTMHz,
+       SAM_mc6,
+       SAM_MHz,
+       SAM_ACTMHz,
+       GFX_MAX
+};
+
+struct gfx_sysfs_info {
+       const char *path;
+       FILE *fp;
+       unsigned int val;
+       unsigned long long val_ull;
+};
+
+static struct gfx_sysfs_info gfx_info[GFX_MAX];
 
 int get_msr(int cpu, off_t offset, unsigned long long *msr);
 
@@ -652,6 +686,7 @@ static const struct platform_features icx_features = {
        .bclk_freq = BCLK_100MHZ,
        .supported_cstates = CC1 | CC6 | PC2 | PC6,
        .cst_limit = CST_LIMIT_ICX,
+       .has_msr_core_c1_res = 1,
        .has_irtl_msrs = 1,
        .has_cst_prewake_bit = 1,
        .trl_msrs = TRL_BASE | TRL_CORECOUNT,
@@ -948,6 +983,175 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi
 #define MAX_ADDED_THREAD_COUNTERS 24
 #define BITMASK_SIZE 32
 
+/* Indexes used to map data read from perf and MSRs into global variables */
+enum rapl_rci_index {
+       RAPL_RCI_INDEX_ENERGY_PKG = 0,
+       RAPL_RCI_INDEX_ENERGY_CORES = 1,
+       RAPL_RCI_INDEX_DRAM = 2,
+       RAPL_RCI_INDEX_GFX = 3,
+       RAPL_RCI_INDEX_PKG_PERF_STATUS = 4,
+       RAPL_RCI_INDEX_DRAM_PERF_STATUS = 5,
+       RAPL_RCI_INDEX_CORE_ENERGY = 6,
+       NUM_RAPL_COUNTERS,
+};
+
+enum rapl_unit {
+       RAPL_UNIT_INVALID,
+       RAPL_UNIT_JOULES,
+       RAPL_UNIT_WATTS,
+};
+
+struct rapl_counter_info_t {
+       unsigned long long data[NUM_RAPL_COUNTERS];
+       enum rapl_source source[NUM_RAPL_COUNTERS];
+       unsigned long long flags[NUM_RAPL_COUNTERS];
+       double scale[NUM_RAPL_COUNTERS];
+       enum rapl_unit unit[NUM_RAPL_COUNTERS];
+
+       union {
+               /* Active when source == RAPL_SOURCE_MSR */
+               struct {
+                       unsigned long long msr[NUM_RAPL_COUNTERS];
+                       unsigned long long msr_mask[NUM_RAPL_COUNTERS];
+                       int msr_shift[NUM_RAPL_COUNTERS];
+               };
+       };
+
+       int fd_perf;
+};
+
+/* struct rapl_counter_info_t for each RAPL domain */
+struct rapl_counter_info_t *rapl_counter_info_perdomain;
+
+#define RAPL_COUNTER_FLAG_USE_MSR_SUM (1u << 1)
+
+struct rapl_counter_arch_info {
+       int feature_mask;       /* Mask for testing if the counter is supported on host */
+       const char *perf_subsys;
+       const char *perf_name;
+       unsigned long long msr;
+       unsigned long long msr_mask;
+       int msr_shift;          /* Positive mean shift right, negative mean shift left */
+       double *platform_rapl_msr_scale;        /* Scale applied to values read by MSR (platform dependent, filled at runtime) */
+       unsigned int rci_index; /* Maps data from perf counters to global variables */
+       unsigned long long bic;
+       double compat_scale;    /* Some counters require constant scaling to be in the same range as other, similar ones */
+       unsigned long long flags;
+};
+
+static const struct rapl_counter_arch_info rapl_counter_arch_infos[] = {
+       {
+        .feature_mask = RAPL_PKG,
+        .perf_subsys = "power",
+        .perf_name = "energy-pkg",
+        .msr = MSR_PKG_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+        .bic = BIC_PkgWatt | BIC_Pkg_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_AMD_F17H,
+        .perf_subsys = "power",
+        .perf_name = "energy-pkg",
+        .msr = MSR_PKG_ENERGY_STAT,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_ENERGY_PKG,
+        .bic = BIC_PkgWatt | BIC_Pkg_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_CORE_ENERGY_STATUS,
+        .perf_subsys = "power",
+        .perf_name = "energy-cores",
+        .msr = MSR_PP0_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_ENERGY_CORES,
+        .bic = BIC_CorWatt | BIC_Cor_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_DRAM,
+        .perf_subsys = "power",
+        .perf_name = "energy-ram",
+        .msr = MSR_DRAM_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_dram_energy_units,
+        .rci_index = RAPL_RCI_INDEX_DRAM,
+        .bic = BIC_RAMWatt | BIC_RAM_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_GFX,
+        .perf_subsys = "power",
+        .perf_name = "energy-gpu",
+        .msr = MSR_PP1_ENERGY_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_GFX,
+        .bic = BIC_GFXWatt | BIC_GFX_J,
+        .compat_scale = 1.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_PKG_PERF_STATUS,
+        .perf_subsys = NULL,
+        .perf_name = NULL,
+        .msr = MSR_PKG_PERF_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_time_units,
+        .rci_index = RAPL_RCI_INDEX_PKG_PERF_STATUS,
+        .bic = BIC_PKG__,
+        .compat_scale = 100.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_DRAM_PERF_STATUS,
+        .perf_subsys = NULL,
+        .perf_name = NULL,
+        .msr = MSR_DRAM_PERF_STATUS,
+        .msr_mask = 0xFFFFFFFFFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_time_units,
+        .rci_index = RAPL_RCI_INDEX_DRAM_PERF_STATUS,
+        .bic = BIC_RAM__,
+        .compat_scale = 100.0,
+        .flags = RAPL_COUNTER_FLAG_USE_MSR_SUM,
+         },
+       {
+        .feature_mask = RAPL_AMD_F17H,
+        .perf_subsys = NULL,
+        .perf_name = NULL,
+        .msr = MSR_CORE_ENERGY_STAT,
+        .msr_mask = 0xFFFFFFFF,
+        .msr_shift = 0,
+        .platform_rapl_msr_scale = &rapl_energy_units,
+        .rci_index = RAPL_RCI_INDEX_CORE_ENERGY,
+        .bic = BIC_CorWatt | BIC_Cor_J,
+        .compat_scale = 1.0,
+        .flags = 0,
+         },
+};
+
+struct rapl_counter {
+       unsigned long long raw_value;
+       enum rapl_unit unit;
+       double scale;
+};
+
 struct thread_data {
        struct timeval tv_begin;
        struct timeval tv_end;
@@ -974,7 +1178,7 @@ struct core_data {
        unsigned long long c7;
        unsigned long long mc6_us;      /* duplicate as per-core for now, even though per module */
        unsigned int core_temp_c;
-       unsigned int core_energy;       /* MSR_CORE_ENERGY_STAT */
+       struct rapl_counter core_energy;        /* MSR_CORE_ENERGY_STAT */
        unsigned int core_id;
        unsigned long long core_throt_cnt;
        unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -989,8 +1193,8 @@ struct pkg_data {
        unsigned long long pc8;
        unsigned long long pc9;
        unsigned long long pc10;
-       unsigned long long cpu_lpi;
-       unsigned long long sys_lpi;
+       long long cpu_lpi;
+       long long sys_lpi;
        unsigned long long pkg_wtd_core_c0;
        unsigned long long pkg_any_core_c0;
        unsigned long long pkg_any_gfxe_c0;
@@ -998,13 +1202,16 @@ struct pkg_data {
        long long gfx_rc6_ms;
        unsigned int gfx_mhz;
        unsigned int gfx_act_mhz;
+       long long sam_mc6_ms;
+       unsigned int sam_mhz;
+       unsigned int sam_act_mhz;
        unsigned int package_id;
-       unsigned long long energy_pkg;  /* MSR_PKG_ENERGY_STATUS */
-       unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
-       unsigned long long energy_cores;        /* MSR_PP0_ENERGY_STATUS */
-       unsigned long long energy_gfx;  /* MSR_PP1_ENERGY_STATUS */
-       unsigned long long rapl_pkg_perf_status;        /* MSR_PKG_PERF_STATUS */
-       unsigned long long rapl_dram_perf_status;       /* MSR_DRAM_PERF_STATUS */
+       struct rapl_counter energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+       struct rapl_counter energy_dram;        /* MSR_DRAM_ENERGY_STATUS */
+       struct rapl_counter energy_cores;       /* MSR_PP0_ENERGY_STATUS */
+       struct rapl_counter energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+       struct rapl_counter rapl_pkg_perf_status;       /* MSR_PKG_PERF_STATUS */
+       struct rapl_counter rapl_dram_perf_status;      /* MSR_DRAM_PERF_STATUS */
        unsigned int pkg_temp_c;
        unsigned int uncore_mhz;
        unsigned long long counter[MAX_ADDED_COUNTERS];
@@ -1150,6 +1357,38 @@ struct sys_counters {
        struct msr_counter *pp;
 } sys;
 
+void free_sys_counters(void)
+{
+       struct msr_counter *p = sys.tp, *pnext = NULL;
+
+       while (p) {
+               pnext = p->next;
+               free(p);
+               p = pnext;
+       }
+
+       p = sys.cp, pnext = NULL;
+       while (p) {
+               pnext = p->next;
+               free(p);
+               p = pnext;
+       }
+
+       p = sys.pp, pnext = NULL;
+       while (p) {
+               pnext = p->next;
+               free(p);
+               p = pnext;
+       }
+
+       sys.added_thread_counters = 0;
+       sys.added_core_counters = 0;
+       sys.added_package_counters = 0;
+       sys.tp = NULL;
+       sys.cp = NULL;
+       sys.pp = NULL;
+}
+
 struct system_summary {
        struct thread_data threads;
        struct core_data cores;
@@ -1280,34 +1519,60 @@ int get_msr_fd(int cpu)
        sprintf(pathname, "/dev/cpu/%d/msr", cpu);
        fd = open(pathname, O_RDONLY);
        if (fd < 0)
-               err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+               err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, "
+                   "or run with --no-msr, or run as root", pathname);
 
        fd_percpu[cpu] = fd;
 
        return fd;
 }
 
+static void bic_disable_msr_access(void)
+{
+       const unsigned long bic_msrs =
+           BIC_SMI |
+           BIC_CPU_c1 |
+           BIC_CPU_c3 |
+           BIC_CPU_c6 |
+           BIC_CPU_c7 |
+           BIC_Mod_c6 |
+           BIC_CoreTmp |
+           BIC_Totl_c0 |
+           BIC_Any_c0 |
+           BIC_GFX_c0 |
+           BIC_CPUGFX |
+           BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_PkgTmp;
+
+       bic_enabled &= ~bic_msrs;
+
+       free_sys_counters();
+}
+
 static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
 {
+       assert(!no_perf);
+
        return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
 }
 
-static int perf_instr_count_open(int cpu_num)
+static long open_perf_counter(int cpu, unsigned int type, unsigned int config, int group_fd, __u64 read_format)
 {
-       struct perf_event_attr pea;
-       int fd;
+       struct perf_event_attr attr;
+       const pid_t pid = -1;
+       const unsigned long flags = 0;
 
-       memset(&pea, 0, sizeof(struct perf_event_attr));
-       pea.type = PERF_TYPE_HARDWARE;
-       pea.size = sizeof(struct perf_event_attr);
-       pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+       assert(!no_perf);
 
-       /* counter for cpu_num, including user + kernel and all processes */
-       fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
-       if (fd == -1) {
-               warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
-               BIC_NOT_PRESENT(BIC_IPC);
-       }
+       memset(&attr, 0, sizeof(struct perf_event_attr));
+
+       attr.type = type;
+       attr.size = sizeof(struct perf_event_attr);
+       attr.config = config;
+       attr.disabled = 0;
+       attr.sample_type = PERF_SAMPLE_IDENTIFIER;
+       attr.read_format = read_format;
+
+       const int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
 
        return fd;
 }
@@ -1317,7 +1582,7 @@ int get_instr_count_fd(int cpu)
        if (fd_instr_count_percpu[cpu])
                return fd_instr_count_percpu[cpu];
 
-       fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+       fd_instr_count_percpu[cpu] = open_perf_counter(cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
 
        return fd_instr_count_percpu[cpu];
 }
@@ -1326,6 +1591,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 {
        ssize_t retval;
 
+       assert(!no_msr);
+
        retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
 
        if (retval != sizeof *msr)
@@ -1334,6 +1601,21 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
        return 0;
 }
 
+int probe_msr(int cpu, off_t offset)
+{
+       ssize_t retval;
+       unsigned long long dummy;
+
+       assert(!no_msr);
+
+       retval = pread(get_msr_fd(cpu), &dummy, sizeof(dummy), offset);
+
+       if (retval != sizeof(dummy))
+               return 1;
+
+       return 0;
+}
+
 #define MAX_DEFERRED 16
 char *deferred_add_names[MAX_DEFERRED];
 char *deferred_skip_names[MAX_DEFERRED];
@@ -1369,6 +1651,8 @@ void help(void)
                "               Override default 5-second measurement interval\n"
                "  -J, --Joules displays energy in Joules instead of Watts\n"
                "  -l, --list   list column headers only\n"
+               "  -M, --no-msr Disable all uses of the MSR driver\n"
+               "  -P, --no-perf Disable all uses of the perf API\n"
                "  -n, --num_iterations num\n"
                "               number of the measurement iterations\n"
                "  -N, --header_iterations num\n"
@@ -1573,6 +1857,15 @@ void print_header(char *delim)
        if (DO_BIC(BIC_GFXACTMHz))
                outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
 
+       if (DO_BIC(BIC_SAM_mc6))
+               outp += sprintf(outp, "%sSAM%%mc6", (printed++ ? delim : ""));
+
+       if (DO_BIC(BIC_SAMMHz))
+               outp += sprintf(outp, "%sSAMMHz", (printed++ ? delim : ""));
+
+       if (DO_BIC(BIC_SAMACTMHz))
+               outp += sprintf(outp, "%sSAMAMHz", (printed++ ? delim : ""));
+
        if (DO_BIC(BIC_Totl_c0))
                outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Any_c0))
@@ -1671,26 +1964,35 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                        outp += sprintf(outp, "SMI: %d\n", t->smi_count);
 
                for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
+                       outp +=
+                           sprintf(outp, "tADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   t->counter[i], mp->path);
                }
        }
 
-       if (c) {
+       if (c && is_cpu_first_thread_in_core(t, c, p)) {
                outp += sprintf(outp, "core: %d\n", c->core_id);
                outp += sprintf(outp, "c3: %016llX\n", c->c3);
                outp += sprintf(outp, "c6: %016llX\n", c->c6);
                outp += sprintf(outp, "c7: %016llX\n", c->c7);
                outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
                outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt);
-               outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
+
+               const unsigned long long energy_value = c->core_energy.raw_value * c->core_energy.scale;
+               const double energy_scale = c->core_energy.scale;
+
+               if (c->core_energy.unit == RAPL_UNIT_JOULES)
+                       outp += sprintf(outp, "Joules: %0llX (scale: %lf)\n", energy_value, energy_scale);
 
                for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
+                       outp +=
+                           sprintf(outp, "cADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   c->counter[i], mp->path);
                }
                outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
        }
 
-       if (p) {
+       if (p && is_cpu_first_core_in_package(t, c, p)) {
                outp += sprintf(outp, "package: %d\n", p->package_id);
 
                outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
@@ -1710,16 +2012,18 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
                outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
                outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
                outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
-               outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
-               outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
-               outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
-               outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
-               outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
-               outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
+               outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg.raw_value);
+               outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores.raw_value);
+               outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx.raw_value);
+               outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram.raw_value);
+               outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status.raw_value);
+               outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status.raw_value);
                outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
 
                for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-                       outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
+                       outp +=
+                           sprintf(outp, "pADDED [%d] %8s msr0x%x: %08llX %s\n", i, mp->name, mp->msr_num,
+                                   p->counter[i], mp->path);
                }
        }
 
@@ -1728,6 +2032,23 @@ int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p
        return 0;
 }
 
+double rapl_counter_get_value(const struct rapl_counter *c, enum rapl_unit desired_unit, double interval)
+{
+       assert(desired_unit != RAPL_UNIT_INVALID);
+
+       /*
+        * For now we don't expect anything other than joules,
+        * so just simplify the logic.
+        */
+       assert(c->unit == RAPL_UNIT_JOULES);
+
+       const double scaled = c->raw_value * c->scale;
+
+       if (desired_unit == RAPL_UNIT_WATTS)
+               return scaled / interval;
+       return scaled;
+}
+
 /*
  * column formatting convention & formats
  */
@@ -1921,9 +2242,11 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
 
        if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl)
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&c->core_energy, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_Cor_J) && platform->has_per_core_rapl)
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&c->core_energy, RAPL_UNIT_JOULES, interval_float));
 
        /* print per-package data only for 1st core in package */
        if (!is_cpu_first_core_in_package(t, c, p))
@@ -1951,6 +2274,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_GFXACTMHz))
                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
 
+       /* SAMmc6 */
+       if (DO_BIC(BIC_SAM_mc6)) {
+               if (p->sam_mc6_ms == -1) {      /* detect GFX counter reset */
+                       outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+               } else {
+                       outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                       p->sam_mc6_ms / 10.0 / interval_float);
+               }
+       }
+
+       /* SAMMHz */
+       if (DO_BIC(BIC_SAMMHz))
+               outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_mhz);
+
+       /* SAMACTMHz */
+       if (DO_BIC(BIC_SAMACTMHz))
+               outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->sam_act_mhz);
+
        /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
        if (DO_BIC(BIC_Totl_c0))
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
@@ -1976,43 +2317,59 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        if (DO_BIC(BIC_Pkgpc10))
                outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
 
-       if (DO_BIC(BIC_CPU_LPI))
-               outp +=
-                   sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
-       if (DO_BIC(BIC_SYS_LPI))
-               outp +=
-                   sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+       if (DO_BIC(BIC_CPU_LPI)) {
+               if (p->cpu_lpi >= 0)
+                       outp +=
+                           sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                   100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+               else
+                       outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+       }
+       if (DO_BIC(BIC_SYS_LPI)) {
+               if (p->sys_lpi >= 0)
+                       outp +=
+                           sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+                                   100.0 * p->sys_lpi / 1000000.0 / interval_float);
+               else
+                       outp += sprintf(outp, "%s(neg)", (printed++ ? delim : ""));
+       }
 
        if (DO_BIC(BIC_PkgWatt))
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
-
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_CorWatt) && !platform->has_per_core_rapl)
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_GFXWatt))
                outp +=
-                   sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+                   sprintf(outp, fmt8, (printed++ ? delim : ""),
+                           rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_RAMWatt))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""),
-                           p->energy_dram * rapl_dram_energy_units / interval_float);
+                           rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_Pkg_J))
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_pkg, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_Cor_J) && !platform->has_per_core_rapl)
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_cores, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_GFX_J))
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_gfx, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_RAM_J))
-               outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+               outp += sprintf(outp, fmt8, (printed++ ? delim : ""),
+                               rapl_counter_get_value(&p->energy_dram, RAPL_UNIT_JOULES, interval_float));
        if (DO_BIC(BIC_PKG__))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""),
-                           100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+                           rapl_counter_get_value(&p->rapl_pkg_perf_status, RAPL_UNIT_WATTS, interval_float));
        if (DO_BIC(BIC_RAM__))
                outp +=
                    sprintf(outp, fmt8, (printed++ ? delim : ""),
-                           100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+                           rapl_counter_get_value(&p->rapl_dram_perf_status, RAPL_UNIT_WATTS, interval_float));
        /* UncMHz */
        if (DO_BIC(BIC_UNCORE_MHZ))
                outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz);
@@ -2121,12 +2478,22 @@ int delta_package(struct pkg_data *new, struct pkg_data *old)
        old->gfx_mhz = new->gfx_mhz;
        old->gfx_act_mhz = new->gfx_act_mhz;
 
-       old->energy_pkg = new->energy_pkg - old->energy_pkg;
-       old->energy_cores = new->energy_cores - old->energy_cores;
-       old->energy_gfx = new->energy_gfx - old->energy_gfx;
-       old->energy_dram = new->energy_dram - old->energy_dram;
-       old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
-       old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
+       /* flag an error when mc6 counter resets/wraps */
+       if (old->sam_mc6_ms > new->sam_mc6_ms)
+               old->sam_mc6_ms = -1;
+       else
+               old->sam_mc6_ms = new->sam_mc6_ms - old->sam_mc6_ms;
+
+       old->sam_mhz = new->sam_mhz;
+       old->sam_act_mhz = new->sam_act_mhz;
+
+       old->energy_pkg.raw_value = new->energy_pkg.raw_value - old->energy_pkg.raw_value;
+       old->energy_cores.raw_value = new->energy_cores.raw_value - old->energy_cores.raw_value;
+       old->energy_gfx.raw_value = new->energy_gfx.raw_value - old->energy_gfx.raw_value;
+       old->energy_dram.raw_value = new->energy_dram.raw_value - old->energy_dram.raw_value;
+       old->rapl_pkg_perf_status.raw_value = new->rapl_pkg_perf_status.raw_value - old->rapl_pkg_perf_status.raw_value;
+       old->rapl_dram_perf_status.raw_value =
+           new->rapl_dram_perf_status.raw_value - old->rapl_dram_perf_status.raw_value;
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -2150,7 +2517,7 @@ void delta_core(struct core_data *new, struct core_data *old)
        old->core_throt_cnt = new->core_throt_cnt;
        old->mc6_us = new->mc6_us - old->mc6_us;
 
-       DELTA_WRAP32(new->core_energy, old->core_energy);
+       DELTA_WRAP32(new->core_energy.raw_value, old->core_energy.raw_value);
 
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -2277,6 +2644,13 @@ int delta_cpu(struct thread_data *t, struct core_data *c,
        return retval;
 }
 
+void rapl_counter_clear(struct rapl_counter *c)
+{
+       c->raw_value = 0;
+       c->scale = 0.0;
+       c->unit = RAPL_UNIT_INVALID;
+}
+
 void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        int i;
@@ -2304,7 +2678,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        c->c7 = 0;
        c->mc6_us = 0;
        c->core_temp_c = 0;
-       c->core_energy = 0;
+       rapl_counter_clear(&c->core_energy);
        c->core_throt_cnt = 0;
 
        p->pkg_wtd_core_c0 = 0;
@@ -2325,18 +2699,21 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
        p->cpu_lpi = 0;
        p->sys_lpi = 0;
 
-       p->energy_pkg = 0;
-       p->energy_dram = 0;
-       p->energy_cores = 0;
-       p->energy_gfx = 0;
-       p->rapl_pkg_perf_status = 0;
-       p->rapl_dram_perf_status = 0;
+       rapl_counter_clear(&p->energy_pkg);
+       rapl_counter_clear(&p->energy_dram);
+       rapl_counter_clear(&p->energy_cores);
+       rapl_counter_clear(&p->energy_gfx);
+       rapl_counter_clear(&p->rapl_pkg_perf_status);
+       rapl_counter_clear(&p->rapl_dram_perf_status);
        p->pkg_temp_c = 0;
 
        p->gfx_rc6_ms = 0;
        p->uncore_mhz = 0;
        p->gfx_mhz = 0;
        p->gfx_act_mhz = 0;
+       p->sam_mc6_ms = 0;
+       p->sam_mhz = 0;
+       p->sam_act_mhz = 0;
        for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
                t->counter[i] = 0;
 
@@ -2347,6 +2724,20 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
                p->counter[i] = 0;
 }
 
+void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src)
+{
+       /* Copy unit and scale from src if dst is not initialized */
+       if (dst->unit == RAPL_UNIT_INVALID) {
+               dst->unit = src->unit;
+               dst->scale = src->scale;
+       }
+
+       assert(dst->unit == src->unit);
+       assert(dst->scale == src->scale);
+
+       dst->raw_value += src->raw_value;
+}
+
 int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 {
        int i;
@@ -2393,7 +2784,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
        average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt);
 
-       average.cores.core_energy += c->core_energy;
+       rapl_counter_accumulate(&average.cores.core_energy, &c->core_energy);
 
        for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
                if (mp->format == FORMAT_RAW)
@@ -2428,25 +2819,29 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        average.packages.cpu_lpi = p->cpu_lpi;
        average.packages.sys_lpi = p->sys_lpi;
 
-       average.packages.energy_pkg += p->energy_pkg;
-       average.packages.energy_dram += p->energy_dram;
-       average.packages.energy_cores += p->energy_cores;
-       average.packages.energy_gfx += p->energy_gfx;
+       rapl_counter_accumulate(&average.packages.energy_pkg, &p->energy_pkg);
+       rapl_counter_accumulate(&average.packages.energy_dram, &p->energy_dram);
+       rapl_counter_accumulate(&average.packages.energy_cores, &p->energy_cores);
+       rapl_counter_accumulate(&average.packages.energy_gfx, &p->energy_gfx);
 
        average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
        average.packages.uncore_mhz = p->uncore_mhz;
        average.packages.gfx_mhz = p->gfx_mhz;
        average.packages.gfx_act_mhz = p->gfx_act_mhz;
+       average.packages.sam_mc6_ms = p->sam_mc6_ms;
+       average.packages.sam_mhz = p->sam_mhz;
+       average.packages.sam_act_mhz = p->sam_act_mhz;
 
        average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
 
-       average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
-       average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+       rapl_counter_accumulate(&average.packages.rapl_pkg_perf_status, &p->rapl_pkg_perf_status);
+       rapl_counter_accumulate(&average.packages.rapl_dram_perf_status, &p->rapl_dram_perf_status);
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
-               if (mp->format == FORMAT_RAW)
-                       continue;
-               average.packages.counter[i] += p->counter[i];
+               if ((mp->format == FORMAT_RAW) && (topo.num_packages == 0))
+                       average.packages.counter[i] = p->counter[i];
+               else
+                       average.packages.counter[i] += p->counter[i];
        }
        return 0;
 }
@@ -2578,6 +2973,7 @@ unsigned long long snapshot_sysfs_counter(char *path)
 int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
 {
        if (mp->msr_num != 0) {
+               assert(!no_msr);
                if (get_msr(cpu, mp->msr_num, counterp))
                        return -1;
        } else {
@@ -2599,7 +2995,7 @@ unsigned long long get_uncore_mhz(int package, int die)
 {
        char path[128];
 
-       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package,
+       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d/current_freq_khz", package,
                die);
 
        return (snapshot_sysfs_counter(path) / 1000);
@@ -2627,6 +3023,9 @@ int get_epb(int cpu)
        return epb;
 
 msr_fallback:
+       if (no_msr)
+               return -1;
+
        get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
 
        return msr & 0xf;
@@ -2700,187 +3099,495 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt)
        return 0;
 }
 
-/*
- * get_counters(...)
- * migrate to cpu
- * acquire and record local counters for that cpu
- */
-int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+struct amperf_group_fd {
+       int aperf;              /* Also the group descriptor */
+       int mperf;
+};
+
+static int read_perf_counter_info(const char *const path, const char *const parse_format, void *value_ptr)
 {
-       int cpu = t->cpu_id;
-       unsigned long long msr;
-       int aperf_mperf_retry_count = 0;
-       struct msr_counter *mp;
-       int i;
+       int fdmt;
+       int bytes_read;
+       char buf[64];
+       int ret = -1;
 
-       if (cpu_migrate(cpu)) {
-               fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
-               return -1;
+       fdmt = open(path, O_RDONLY, 0);
+       if (fdmt == -1) {
+               if (debug)
+                       fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+               ret = -1;
+               goto cleanup_and_exit;
        }
 
-       gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+       bytes_read = read(fdmt, buf, sizeof(buf) - 1);
+       if (bytes_read <= 0 || bytes_read >= (int)sizeof(buf)) {
+               if (debug)
+                       fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+               ret = -1;
+               goto cleanup_and_exit;
+       }
 
-       if (first_counter_read)
-               get_apic_id(t);
-retry:
-       t->tsc = rdtsc();       /* we are running on local CPU of interest */
+       buf[bytes_read] = '\0';
 
-       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
-           || soft_c1_residency_display(BIC_Avg_MHz)) {
-               unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+       if (sscanf(buf, parse_format, value_ptr) != 1) {
+               if (debug)
+                       fprintf(stderr, "Failed to parse perf counter info %s\n", path);
+               ret = -1;
+               goto cleanup_and_exit;
+       }
 
-               /*
-                * The TSC, APERF and MPERF must be read together for
-                * APERF/MPERF and MPERF/TSC to give accurate results.
-                *
-                * Unfortunately, APERF and MPERF are read by
-                * individual system call, so delays may occur
-                * between them.  If the time to read them
-                * varies by a large amount, we re-read them.
-                */
+       ret = 0;
 
-               /*
-                * This initial dummy APERF read has been seen to
-                * reduce jitter in the subsequent reads.
-                */
+cleanup_and_exit:
+       close(fdmt);
+       return ret;
+}
 
-               if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
-                       return -3;
+static unsigned int read_perf_counter_info_n(const char *const path, const char *const parse_format)
+{
+       unsigned int v;
+       int status;
 
-               t->tsc = rdtsc();       /* re-read close to APERF */
+       status = read_perf_counter_info(path, parse_format, &v);
+       if (status)
+               v = -1;
 
-               tsc_before = t->tsc;
+       return v;
+}
 
-               if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
-                       return -3;
+static unsigned int read_msr_type(void)
+{
+       const char *const path = "/sys/bus/event_source/devices/msr/type";
+       const char *const format = "%u";
 
-               tsc_between = rdtsc();
+       return read_perf_counter_info_n(path, format);
+}
 
-               if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
-                       return -4;
+static unsigned int read_aperf_config(void)
+{
+       const char *const path = "/sys/bus/event_source/devices/msr/events/aperf";
+       const char *const format = "event=%x";
 
-               tsc_after = rdtsc();
+       return read_perf_counter_info_n(path, format);
+}
 
-               aperf_time = tsc_between - tsc_before;
-               mperf_time = tsc_after - tsc_between;
+static unsigned int read_mperf_config(void)
+{
+       const char *const path = "/sys/bus/event_source/devices/msr/events/mperf";
+       const char *const format = "event=%x";
 
-               /*
-                * If the system call latency to read APERF and MPERF
-                * differ by more than 2x, then try again.
-                */
-               if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
-                       aperf_mperf_retry_count++;
-                       if (aperf_mperf_retry_count < 5)
-                               goto retry;
-                       else
-                               warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
-               }
-               aperf_mperf_retry_count = 0;
+       return read_perf_counter_info_n(path, format);
+}
 
-               t->aperf = t->aperf * aperf_mperf_multiplier;
-               t->mperf = t->mperf * aperf_mperf_multiplier;
-       }
+static unsigned int read_perf_type(const char *subsys)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/type";
+       const char *const format = "%u";
+       char path[128];
 
-       if (DO_BIC(BIC_IPC))
-               if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
-                       return -4;
+       snprintf(path, sizeof(path), path_format, subsys);
 
-       if (DO_BIC(BIC_IRQ))
-               t->irq_count = irqs_per_cpu[cpu];
-       if (DO_BIC(BIC_SMI)) {
-               if (get_msr(cpu, MSR_SMI_COUNT, &msr))
-                       return -5;
-               t->smi_count = msr & 0xFFFFFFFF;
-       }
-       if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
-               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
-                       return -6;
-       }
+       return read_perf_counter_info_n(path, format);
+}
 
-       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
-               if (get_mp(cpu, mp, &t->counter[i]))
-                       return -10;
-       }
+static unsigned int read_rapl_config(const char *subsys, const char *event_name)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s";
+       const char *const format = "event=%x";
+       char path[128];
 
-       /* collect core counters only for 1st thread in core */
-       if (!is_cpu_first_thread_in_core(t, c, p))
-               goto done;
+       snprintf(path, sizeof(path), path_format, subsys, event_name);
 
-       if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
-               if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
-                       return -6;
-       }
+       return read_perf_counter_info_n(path, format);
+}
 
-       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
-               if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
-                       return -7;
-       } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
-               if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
-                       return -7;
-       }
+static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.unit";
+       const char *const format = "%s";
+       char path[128];
+       char unit_buffer[16];
 
-       if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
-               if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
-                       return -8;
-               else if (t->is_atom) {
-                       /*
-                        * For Atom CPUs that has core cstate deeper than c6,
-                        * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
-                        * Minus CC7 (and deeper cstates) residency to get
-                        * accturate cc6 residency.
-                        */
-                       c->c6 -= c->c7;
-               }
-       }
+       snprintf(path, sizeof(path), path_format, subsys, event_name);
 
-       if (DO_BIC(BIC_Mod_c6))
-               if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
-                       return -8;
+       read_perf_counter_info(path, format, &unit_buffer);
+       if (strcmp("Joules", unit_buffer) == 0)
+               return RAPL_UNIT_JOULES;
 
-       if (DO_BIC(BIC_CoreTmp)) {
-               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
-                       return -9;
-               c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
-       }
+       return RAPL_UNIT_INVALID;
+}
 
-       if (DO_BIC(BIC_CORE_THROT_CNT))
-               get_core_throt_cnt(cpu, &c->core_throt_cnt);
+static double read_perf_rapl_scale(const char *subsys, const char *event_name)
+{
+       const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale";
+       const char *const format = "%lf";
+       char path[128];
+       double scale;
 
-       if (platform->rapl_msrs & RAPL_AMD_F17H) {
-               if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr))
-                       return -14;
-               c->core_energy = msr & 0xFFFFFFFF;
-       }
+       snprintf(path, sizeof(path), path_format, subsys, event_name);
 
-       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
-               if (get_mp(cpu, mp, &c->counter[i]))
-                       return -10;
-       }
+       if (read_perf_counter_info(path, format, &scale))
+               return 0.0;
 
-       /* collect package counters only for 1st core in package */
-       if (!is_cpu_first_core_in_package(t, c, p))
-               goto done;
+       return scale;
+}
 
-       if (DO_BIC(BIC_Totl_c0)) {
-               if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
-                       return -10;
-       }
-       if (DO_BIC(BIC_Any_c0)) {
-               if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
-                       return -11;
-       }
-       if (DO_BIC(BIC_GFX_c0)) {
-               if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
-                       return -12;
-       }
-       if (DO_BIC(BIC_CPUGFX)) {
-               if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
-                       return -13;
-       }
-       if (DO_BIC(BIC_Pkgpc3))
-               if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
-                       return -9;
-       if (DO_BIC(BIC_Pkgpc6)) {
+static struct amperf_group_fd open_amperf_fd(int cpu)
+{
+       const unsigned int msr_type = read_msr_type();
+       const unsigned int aperf_config = read_aperf_config();
+       const unsigned int mperf_config = read_mperf_config();
+       struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 };
+
+       fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP);
+       fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP);
+
+       return fds;
+}
+
+static int get_amperf_fd(int cpu)
+{
+       assert(fd_amperf_percpu);
+
+       if (fd_amperf_percpu[cpu].aperf)
+               return fd_amperf_percpu[cpu].aperf;
+
+       fd_amperf_percpu[cpu] = open_amperf_fd(cpu);
+
+       return fd_amperf_percpu[cpu].aperf;
+}
+
+/* Read APERF, MPERF and TSC using the perf API. */
+static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu)
+{
+       union {
+               struct {
+                       unsigned long nr_entries;
+                       unsigned long aperf;
+                       unsigned long mperf;
+               };
+
+               unsigned long as_array[3];
+       } cnt;
+
+       const int fd_amperf = get_amperf_fd(cpu);
+
+       /*
+        * Read the TSC with rdtsc, because we want the absolute value and not
+        * the offset from the start of the counter.
+        */
+       t->tsc = rdtsc();
+
+       const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array));
+
+       if (n != sizeof(cnt.as_array))
+               return -2;
+
+       t->aperf = cnt.aperf * aperf_mperf_multiplier;
+       t->mperf = cnt.mperf * aperf_mperf_multiplier;
+
+       return 0;
+}
+
+/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */
+static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu)
+{
+       unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+       int aperf_mperf_retry_count = 0;
+
+       /*
+        * The TSC, APERF and MPERF must be read together for
+        * APERF/MPERF and MPERF/TSC to give accurate results.
+        *
+        * Unfortunately, APERF and MPERF are read by
+        * individual system call, so delays may occur
+        * between them.  If the time to read them
+        * varies by a large amount, we re-read them.
+        */
+
+       /*
+        * This initial dummy APERF read has been seen to
+        * reduce jitter in the subsequent reads.
+        */
+
+       if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+               return -3;
+
+retry:
+       t->tsc = rdtsc();       /* re-read close to APERF */
+
+       tsc_before = t->tsc;
+
+       if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+               return -3;
+
+       tsc_between = rdtsc();
+
+       if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+               return -4;
+
+       tsc_after = rdtsc();
+
+       aperf_time = tsc_between - tsc_before;
+       mperf_time = tsc_after - tsc_between;
+
+       /*
+        * If the system call latency to read APERF and MPERF
+        * differ by more than 2x, then try again.
+        */
+       if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+               aperf_mperf_retry_count++;
+               if (aperf_mperf_retry_count < 5)
+                       goto retry;
+               else
+                       warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
+       }
+       aperf_mperf_retry_count = 0;
+
+       t->aperf = t->aperf * aperf_mperf_multiplier;
+       t->mperf = t->mperf * aperf_mperf_multiplier;
+
+       return 0;
+}
+
+size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci)
+{
+       size_t ret = 0;
+
+       for (int i = 0; i < NUM_RAPL_COUNTERS; ++i)
+               if (rci->source[i] == RAPL_SOURCE_PERF)
+                       ++ret;
+
+       return ret;
+}
+
+void write_rapl_counter(struct rapl_counter *rc, struct rapl_counter_info_t *rci, unsigned int idx)
+{
+       rc->raw_value = rci->data[idx];
+       rc->unit = rci->unit[idx];
+       rc->scale = rci->scale[idx];
+}
+
+int get_rapl_counters(int cpu, int domain, struct core_data *c, struct pkg_data *p)
+{
+       unsigned long long perf_data[NUM_RAPL_COUNTERS + 1];
+       struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain];
+
+       if (debug)
+               fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain);
+
+       assert(rapl_counter_info_perdomain);
+
+       /*
+        * If we have any perf counters to read, read them all now, in bulk
+        */
+       if (rci->fd_perf != -1) {
+               size_t num_perf_counters = rapl_counter_info_count_perf(rci);
+               const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long);
+               const ssize_t actual_read_size = read(rci->fd_perf, &perf_data[0], sizeof(perf_data));
+
+               if (actual_read_size != expected_read_size)
+                       err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size,
+                           actual_read_size);
+       }
+
+       for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) {
+               switch (rci->source[i]) {
+               case RAPL_SOURCE_NONE:
+                       break;
+
+               case RAPL_SOURCE_PERF:
+                       assert(pi < ARRAY_SIZE(perf_data));
+                       assert(rci->fd_perf != -1);
+
+                       if (debug)
+                               fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n",
+                                       i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]);
+
+                       rci->data[i] = perf_data[pi];
+
+                       ++pi;
+                       break;
+
+               case RAPL_SOURCE_MSR:
+                       if (debug)
+                               fprintf(stderr, "Reading rapl counter via msr at %u\n", i);
+
+                       assert(!no_msr);
+                       if (rci->flags[i] & RAPL_COUNTER_FLAG_USE_MSR_SUM) {
+                               if (get_msr_sum(cpu, rci->msr[i], &rci->data[i]))
+                                       return -13 - i;
+                       } else {
+                               if (get_msr(cpu, rci->msr[i], &rci->data[i]))
+                                       return -13 - i;
+                       }
+
+                       rci->data[i] &= rci->msr_mask[i];
+                       if (rci->msr_shift[i] >= 0)
+                               rci->data[i] >>= abs(rci->msr_shift[i]);
+                       else
+                               rci->data[i] <<= abs(rci->msr_shift[i]);
+
+                       break;
+               }
+       }
+
+       _Static_assert(NUM_RAPL_COUNTERS == 7);
+       write_rapl_counter(&p->energy_pkg, rci, RAPL_RCI_INDEX_ENERGY_PKG);
+       write_rapl_counter(&p->energy_cores, rci, RAPL_RCI_INDEX_ENERGY_CORES);
+       write_rapl_counter(&p->energy_dram, rci, RAPL_RCI_INDEX_DRAM);
+       write_rapl_counter(&p->energy_gfx, rci, RAPL_RCI_INDEX_GFX);
+       write_rapl_counter(&p->rapl_pkg_perf_status, rci, RAPL_RCI_INDEX_PKG_PERF_STATUS);
+       write_rapl_counter(&p->rapl_dram_perf_status, rci, RAPL_RCI_INDEX_DRAM_PERF_STATUS);
+       write_rapl_counter(&c->core_energy, rci, RAPL_RCI_INDEX_CORE_ENERGY);
+
+       return 0;
+}
+
+/*
+ * get_counters(...)
+ * migrate to cpu
+ * acquire and record local counters for that cpu
+ */
+int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+       int cpu = t->cpu_id;
+       unsigned long long msr;
+       struct msr_counter *mp;
+       int i;
+       int status;
+
+       if (cpu_migrate(cpu)) {
+               fprintf(outf, "%s: Could not migrate to CPU %d\n", __func__, cpu);
+               return -1;
+       }
+
+       gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
+       if (first_counter_read)
+               get_apic_id(t);
+
+       t->tsc = rdtsc();       /* we are running on local CPU of interest */
+
+       if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC)
+           || soft_c1_residency_display(BIC_Avg_MHz)) {
+               int status = -1;
+
+               assert(!no_perf || !no_msr);
+
+               switch (amperf_source) {
+               case AMPERF_SOURCE_PERF:
+                       status = read_aperf_mperf_tsc_perf(t, cpu);
+                       break;
+               case AMPERF_SOURCE_MSR:
+                       status = read_aperf_mperf_tsc_msr(t, cpu);
+                       break;
+               }
+
+               if (status != 0)
+                       return status;
+       }
+
+       if (DO_BIC(BIC_IPC))
+               if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
+                       return -4;
+
+       if (DO_BIC(BIC_IRQ))
+               t->irq_count = irqs_per_cpu[cpu];
+       if (DO_BIC(BIC_SMI)) {
+               if (get_msr(cpu, MSR_SMI_COUNT, &msr))
+                       return -5;
+               t->smi_count = msr & 0xFFFFFFFF;
+       }
+       if (DO_BIC(BIC_CPU_c1) && platform->has_msr_core_c1_res) {
+               if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+                       return -6;
+       }
+
+       for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+               if (get_mp(cpu, mp, &t->counter[i]))
+                       return -10;
+       }
+
+       /* collect core counters only for 1st thread in core */
+       if (!is_cpu_first_thread_in_core(t, c, p))
+               goto done;
+
+       if (platform->has_per_core_rapl) {
+               status = get_rapl_counters(cpu, c->core_id, c, p);
+               if (status != 0)
+                       return status;
+       }
+
+       if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) {
+               if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
+                       return -6;
+       }
+
+       if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !platform->has_msr_knl_core_c6_residency) {
+               if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
+                       return -7;
+       } else if (platform->has_msr_knl_core_c6_residency && soft_c1_residency_display(BIC_CPU_c6)) {
+               if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+                       return -7;
+       }
+
+       if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
+               if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
+                       return -8;
+               else if (t->is_atom) {
+                       /*
+                        * For Atom CPUs that has core cstate deeper than c6,
+                        * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
+                        * Minus CC7 (and deeper cstates) residency to get
+                        * accturate cc6 residency.
+                        */
+                       c->c6 -= c->c7;
+               }
+       }
+
+       if (DO_BIC(BIC_Mod_c6))
+               if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+                       return -8;
+
+       if (DO_BIC(BIC_CoreTmp)) {
+               if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+                       return -9;
+               c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
+       }
+
+       if (DO_BIC(BIC_CORE_THROT_CNT))
+               get_core_throt_cnt(cpu, &c->core_throt_cnt);
+
+       for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+               if (get_mp(cpu, mp, &c->counter[i]))
+                       return -10;
+       }
+
+       /* collect package counters only for 1st core in package */
+       if (!is_cpu_first_core_in_package(t, c, p))
+               goto done;
+
+       if (DO_BIC(BIC_Totl_c0)) {
+               if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+                       return -10;
+       }
+       if (DO_BIC(BIC_Any_c0)) {
+               if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+                       return -11;
+       }
+       if (DO_BIC(BIC_GFX_c0)) {
+               if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+                       return -12;
+       }
+       if (DO_BIC(BIC_CPUGFX)) {
+               if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+                       return -13;
+       }
+       if (DO_BIC(BIC_Pkgpc3))
+               if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
+                       return -9;
+       if (DO_BIC(BIC_Pkgpc6)) {
                if (platform->has_msr_atom_pkg_c6_residency) {
                        if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
                                return -10;
@@ -2911,59 +3618,39 @@ retry:
        if (DO_BIC(BIC_SYS_LPI))
                p->sys_lpi = cpuidle_cur_sys_lpi_us;
 
-       if (platform->rapl_msrs & RAPL_PKG) {
-               if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
-                       return -13;
-               p->energy_pkg = msr;
-       }
-       if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS) {
-               if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
-                       return -14;
-               p->energy_cores = msr;
-       }
-       if (platform->rapl_msrs & RAPL_DRAM) {
-               if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
-                       return -15;
-               p->energy_dram = msr;
-       }
-       if (platform->rapl_msrs & RAPL_GFX) {
-               if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
-                       return -16;
-               p->energy_gfx = msr;
-       }
-       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS) {
-               if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
-                       return -16;
-               p->rapl_pkg_perf_status = msr;
-       }
-       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS) {
-               if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
-                       return -16;
-               p->rapl_dram_perf_status = msr;
-       }
-       if (platform->rapl_msrs & RAPL_AMD_F17H) {
-               if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
-                       return -13;
-               p->energy_pkg = msr;
+       if (!platform->has_per_core_rapl) {
+               status = get_rapl_counters(cpu, p->package_id, c, p);
+               if (status != 0)
+                       return status;
        }
+
        if (DO_BIC(BIC_PkgTmp)) {
                if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
                        return -17;
                p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
        }
 
-       if (DO_BIC(BIC_GFX_rc6))
-               p->gfx_rc6_ms = gfx_cur_rc6_ms;
-
        /* n.b. assume die0 uncore frequency applies to whole package */
        if (DO_BIC(BIC_UNCORE_MHZ))
                p->uncore_mhz = get_uncore_mhz(p->package_id, 0);
 
+       if (DO_BIC(BIC_GFX_rc6))
+               p->gfx_rc6_ms = gfx_info[GFX_rc6].val_ull;
+
        if (DO_BIC(BIC_GFXMHz))
-               p->gfx_mhz = gfx_cur_mhz;
+               p->gfx_mhz = gfx_info[GFX_MHz].val;
 
        if (DO_BIC(BIC_GFXACTMHz))
-               p->gfx_act_mhz = gfx_act_mhz;
+               p->gfx_act_mhz = gfx_info[GFX_ACTMHz].val;
+
+       if (DO_BIC(BIC_SAM_mc6))
+               p->sam_mc6_ms = gfx_info[SAM_mc6].val_ull;
+
+       if (DO_BIC(BIC_SAMMHz))
+               p->sam_mhz = gfx_info[SAM_MHz].val;
+
+       if (DO_BIC(BIC_SAMACTMHz))
+               p->sam_act_mhz = gfx_info[SAM_ACTMHz].val;
 
        for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
                if (get_mp(cpu, mp, &p->counter[i]))
@@ -3053,7 +3740,7 @@ void probe_cst_limit(void)
        unsigned long long msr;
        int *pkg_cstate_limits;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        switch (platform->cst_limit) {
@@ -3097,7 +3784,7 @@ static void dump_platform_info(void)
        unsigned long long msr;
        unsigned int ratio;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
@@ -3115,7 +3802,7 @@ static void dump_power_ctl(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
@@ -3321,7 +4008,7 @@ static void dump_cst_cfg(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
@@ -3393,7 +4080,7 @@ void print_irtl(void)
 {
        unsigned long long msr;
 
-       if (!platform->has_irtl_msrs)
+       if (!platform->has_irtl_msrs || no_msr)
                return;
 
        if (platform->supported_cstates & PC3) {
@@ -3443,12 +4130,64 @@ void free_fd_percpu(void)
 {
        int i;
 
+       if (!fd_percpu)
+               return;
+
        for (i = 0; i < topo.max_cpu_num + 1; ++i) {
                if (fd_percpu[i] != 0)
                        close(fd_percpu[i]);
        }
 
        free(fd_percpu);
+       fd_percpu = NULL;
+}
+
+void free_fd_amperf_percpu(void)
+{
+       int i;
+
+       if (!fd_amperf_percpu)
+               return;
+
+       for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+               if (fd_amperf_percpu[i].mperf != 0)
+                       close(fd_amperf_percpu[i].mperf);
+
+               if (fd_amperf_percpu[i].aperf != 0)
+                       close(fd_amperf_percpu[i].aperf);
+       }
+
+       free(fd_amperf_percpu);
+       fd_amperf_percpu = NULL;
+}
+
+void free_fd_instr_count_percpu(void)
+{
+       if (!fd_instr_count_percpu)
+               return;
+
+       for (int i = 0; i < topo.max_cpu_num + 1; ++i) {
+               if (fd_instr_count_percpu[i] != 0)
+                       close(fd_instr_count_percpu[i]);
+       }
+
+       free(fd_instr_count_percpu);
+       fd_instr_count_percpu = NULL;
+}
+
+void free_fd_rapl_percpu(void)
+{
+       if (!rapl_counter_info_perdomain)
+               return;
+
+       const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+
+       for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+               if (rapl_counter_info_perdomain[domain_id].fd_perf != -1)
+                       close(rapl_counter_info_perdomain[domain_id].fd_perf);
+       }
+
+       free(rapl_counter_info_perdomain);
 }
 
 void free_all_buffers(void)
@@ -3492,6 +4231,9 @@ void free_all_buffers(void)
        outp = NULL;
 
        free_fd_percpu();
+       free_fd_instr_count_percpu();
+       free_fd_amperf_percpu();
+       free_fd_rapl_percpu();
 
        free(irq_column_2_cpu);
        free(irqs_per_cpu);
@@ -3825,11 +4567,17 @@ static void update_effective_set(bool startup)
                err(1, "%s: cpu str malformat %s\n", PATH_EFFECTIVE_CPUS, cpu_effective_str);
 }
 
+void linux_perf_init(void);
+void rapl_perf_init(void);
+
 void re_initialize(void)
 {
        free_all_buffers();
        setup_all_buffers(false);
-       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus);
+       linux_perf_init();
+       rapl_perf_init();
+       fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus,
+               topo.allowed_cpus);
 }
 
 void set_max_cpu_num(void)
@@ -3940,85 +4688,43 @@ int snapshot_proc_interrupts(void)
 }
 
 /*
- * snapshot_gfx_rc6_ms()
+ * snapshot_graphics()
  *
- * record snapshot of
- * /sys/class/drm/card0/power/rc6_residency_ms
+ * record snapshot of specified graphics sysfs knob
  *
  * return 1 if config change requires a restart, else return 0
  */
-int snapshot_gfx_rc6_ms(void)
+int snapshot_graphics(int idx)
 {
        FILE *fp;
        int retval;
 
-       fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
-
-       retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
-       if (retval != 1)
-               err(1, "GFX rc6");
-
-       fclose(fp);
-
-       return 0;
-}
-
-/*
- * snapshot_gfx_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
- * when /sys/class/drm/card0/gt_cur_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_mhz(void)
-{
-       static FILE *fp;
-       int retval;
-
-       if (fp == NULL) {
-               fp = fopen("/sys/class/drm/card0/gt_cur_freq_mhz", "r");
-               if (!fp)
-                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
-       } else {
-               rewind(fp);
-               fflush(fp);
-       }
-
-       retval = fscanf(fp, "%d", &gfx_cur_mhz);
-       if (retval != 1)
-               err(1, "GFX MHz");
-
-       return 0;
-}
-
-/*
- * snapshot_gfx_cur_mhz()
- *
- * fall back to /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
- * when /sys/class/drm/card0/gt_act_freq_mhz is not available.
- *
- * return 1 if config change requires a restart, else return 0
- */
-int snapshot_gfx_act_mhz(void)
-{
-       static FILE *fp;
-       int retval;
-
-       if (fp == NULL) {
-               fp = fopen("/sys/class/drm/card0/gt_act_freq_mhz", "r");
-               if (!fp)
-                       fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
-       } else {
-               rewind(fp);
-               fflush(fp);
+       switch (idx) {
+       case GFX_rc6:
+       case SAM_mc6:
+               fp = fopen_or_die(gfx_info[idx].path, "r");
+               retval = fscanf(fp, "%lld", &gfx_info[idx].val_ull);
+               if (retval != 1)
+                       err(1, "rc6");
+               fclose(fp);
+               return 0;
+       case GFX_MHz:
+       case GFX_ACTMHz:
+       case SAM_MHz:
+       case SAM_ACTMHz:
+               if (gfx_info[idx].fp == NULL) {
+                       gfx_info[idx].fp = fopen_or_die(gfx_info[idx].path, "r");
+               } else {
+                       rewind(gfx_info[idx].fp);
+                       fflush(gfx_info[idx].fp);
+               }
+               retval = fscanf(gfx_info[idx].fp, "%d", &gfx_info[idx].val);
+               if (retval != 1)
+                       err(1, "MHz");
+               return 0;
+       default:
+               return -EINVAL;
        }
-
-       retval = fscanf(fp, "%d", &gfx_act_mhz);
-       if (retval != 1)
-               err(1, "GFX ACT MHz");
-
-       return 0;
 }
 
 /*
@@ -4083,13 +4789,22 @@ int snapshot_proc_sysfs_files(void)
                        return 1;
 
        if (DO_BIC(BIC_GFX_rc6))
-               snapshot_gfx_rc6_ms();
+               snapshot_graphics(GFX_rc6);
 
        if (DO_BIC(BIC_GFXMHz))
-               snapshot_gfx_mhz();
+               snapshot_graphics(GFX_MHz);
 
        if (DO_BIC(BIC_GFXACTMHz))
-               snapshot_gfx_act_mhz();
+               snapshot_graphics(GFX_ACTMHz);
+
+       if (DO_BIC(BIC_SAM_mc6))
+               snapshot_graphics(SAM_mc6);
+
+       if (DO_BIC(BIC_SAMMHz))
+               snapshot_graphics(SAM_MHz);
+
+       if (DO_BIC(BIC_SAMACTMHz))
+               snapshot_graphics(SAM_ACTMHz);
 
        if (DO_BIC(BIC_CPU_LPI))
                snapshot_cpu_lpi_us();
@@ -4173,6 +4888,8 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
        int ret, idx;
        unsigned long long msr_cur, msr_last;
 
+       assert(!no_msr);
+
        if (!per_cpu_msr_sum)
                return 1;
 
@@ -4201,6 +4918,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg
        UNUSED(c);
        UNUSED(p);
 
+       assert(!no_msr);
+
        for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
                unsigned long long msr_cur, msr_last;
                off_t offset;
@@ -4280,7 +4999,8 @@ release_msr:
 
 /*
  * set_my_sched_priority(pri)
- * return previous
+ * return previous priority on success
+ * return value < -20 on failure
  */
 int set_my_sched_priority(int priority)
 {
@@ -4290,16 +5010,16 @@ int set_my_sched_priority(int priority)
        errno = 0;
        original_priority = getpriority(PRIO_PROCESS, 0);
        if (errno && (original_priority == -1))
-               err(errno, "getpriority");
+               return -21;
 
        retval = setpriority(PRIO_PROCESS, 0, priority);
        if (retval)
-               errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
+               return -21;
 
        errno = 0;
        retval = getpriority(PRIO_PROCESS, 0);
        if (retval != priority)
-               err(retval, "getpriority(%d) != setpriority(%d)", retval, priority);
+               return -21;
 
        return original_priority;
 }
@@ -4314,6 +5034,9 @@ void turbostat_loop()
 
        /*
         * elevate own priority for interval mode
+        *
+        * ignore on error - we probably don't have permission to set it, but
+        * it's not a big deal
         */
        set_my_sched_priority(-20);
 
@@ -4399,10 +5122,13 @@ void check_dev_msr()
        struct stat sb;
        char pathname[32];
 
+       if (no_msr)
+               return;
+
        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
        if (stat(pathname, &sb))
                if (system("/sbin/modprobe msr > /dev/null 2>&1"))
-                       err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+                       no_msr = 1;
 }
 
 /*
@@ -4414,47 +5140,51 @@ int check_for_cap_sys_rawio(void)
 {
        cap_t caps;
        cap_flag_value_t cap_flag_value;
+       int ret = 0;
 
        caps = cap_get_proc();
        if (caps == NULL)
-               err(-6, "cap_get_proc\n");
+               return 1;
 
-       if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value))
-               err(-6, "cap_get\n");
+       if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) {
+               ret = 1;
+               goto free_and_exit;
+       }
 
        if (cap_flag_value != CAP_SET) {
-               warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
-               return 1;
+               ret = 1;
+               goto free_and_exit;
        }
 
+free_and_exit:
        if (cap_free(caps) == -1)
                err(-6, "cap_free\n");
 
-       return 0;
+       return ret;
 }
 
-void check_permissions(void)
+void check_msr_permission(void)
 {
-       int do_exit = 0;
+       int failed = 0;
        char pathname[32];
 
+       if (no_msr)
+               return;
+
        /* check for CAP_SYS_RAWIO */
-       do_exit += check_for_cap_sys_rawio();
+       failed += check_for_cap_sys_rawio();
 
        /* test file permissions */
        sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
        if (euidaccess(pathname, R_OK)) {
-               do_exit++;
-               warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+               failed++;
        }
 
-       /* if all else fails, thell them to be root */
-       if (do_exit)
-               if (getuid() != 0)
-                       warnx("... or simply run as root");
-
-       if (do_exit)
-               exit(-6);
+       if (failed) {
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly", pathname, "--no-msr");
+               no_msr = 1;
+       }
 }
 
 void probe_bclk(void)
@@ -4462,7 +5192,7 @@ void probe_bclk(void)
        unsigned long long msr;
        unsigned int base_ratio;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        if (platform->bclk_freq == BCLK_100MHZ)
@@ -4502,7 +5232,7 @@ static void dump_turbo_ratio_info(void)
        if (!has_turbo)
                return;
 
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                return;
 
        if (platform->trl_msrs & TRL_LIMIT2)
@@ -4567,20 +5297,15 @@ static void dump_sysfs_file(char *path)
 static void probe_intel_uncore_frequency(void)
 {
        int i, j;
-       char path[128];
+       char path[256];
 
        if (!genuine_intel)
                return;
 
-       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK))
-               return;
-
-       /* Cluster level sysfs not supported yet. */
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00", R_OK))
-               return;
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
+               goto probe_cluster;
 
-       if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK))
-               BIC_PRESENT(BIC_UNCORE_MHZ);
+       BIC_PRESENT(BIC_UNCORE_MHZ);
 
        if (quiet)
                return;
@@ -4588,40 +5313,178 @@ static void probe_intel_uncore_frequency(void)
        for (i = 0; i < topo.num_packages; ++i) {
                for (j = 0; j < topo.num_die; ++j) {
                        int k, l;
+                       char path_base[128];
 
-                       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz",
-                               i, j);
+                       sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/package_%02d_die_%02d", i,
+                               j);
+
+                       sprintf(path, "%s/min_freq_khz", path_base);
                        k = read_sysfs_int(path);
-                       sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/max_freq_khz", path_base);
                        l = read_sysfs_int(path);
-                       fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
+                       fprintf(outf, "Uncore Frequency package%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000);
 
-                       sprintf(path,
-                               "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/initial_min_freq_khz", path_base);
                        k = read_sysfs_int(path);
-                       sprintf(path,
-                               "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz",
-                               i, j);
+                       sprintf(path, "%s/initial_max_freq_khz", path_base);
                        l = read_sysfs_int(path);
-                       fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000);
+                       fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+                       sprintf(path, "%s/current_freq_khz", path_base);
+                       k = read_sysfs_int(path);
+                       fprintf(outf, " %d MHz\n", k / 1000);
                }
        }
+       return;
+
+probe_cluster:
+       if (access("/sys/devices/system/cpu/intel_uncore_frequency/uncore00/current_freq_khz", R_OK))
+               return;
+
+       if (quiet)
+               return;
+
+       for (i = 0;; ++i) {
+               int k, l;
+               char path_base[128];
+               int package_id, domain_id, cluster_id;
+
+               sprintf(path_base, "/sys/devices/system/cpu/intel_uncore_frequency/uncore%02d", i);
+
+               if (access(path_base, R_OK))
+                       break;
+
+               sprintf(path, "%s/package_id", path_base);
+               package_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/domain_id", path_base);
+               domain_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/fabric_cluster_id", path_base);
+               cluster_id = read_sysfs_int(path);
+
+               sprintf(path, "%s/min_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               sprintf(path, "%s/max_freq_khz", path_base);
+               l = read_sysfs_int(path);
+               fprintf(outf, "Uncore Frequency package%d domain%d cluster%d: %d - %d MHz ", package_id, domain_id,
+                       cluster_id, k / 1000, l / 1000);
+
+               sprintf(path, "%s/initial_min_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               sprintf(path, "%s/initial_max_freq_khz", path_base);
+               l = read_sysfs_int(path);
+               fprintf(outf, "(%d - %d MHz)", k / 1000, l / 1000);
+
+               sprintf(path, "%s/current_freq_khz", path_base);
+               k = read_sysfs_int(path);
+               fprintf(outf, " %d MHz\n", k / 1000);
+       }
 }
 
 static void probe_graphics(void)
 {
+       /* Xe graphics sysfs knobs */
+       if (!access("/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms", R_OK)) {
+               FILE *fp;
+               char buf[8];
+               bool gt0_is_gt;
+               int idx;
+
+               fp = fopen("/sys/class/drm/card0/device/tile0/gt0/gtidle/name", "r");
+               if (!fp)
+                       goto next;
+
+               if (!fread(buf, sizeof(char), 7, fp)) {
+                       fclose(fp);
+                       goto next;
+               }
+               fclose(fp);
+
+               if (!strncmp(buf, "gt0-rc", strlen("gt0-rc")))
+                       gt0_is_gt = true;
+               else if (!strncmp(buf, "gt0-mc", strlen("gt0-mc")))
+                       gt0_is_gt = false;
+               else
+                       goto next;
+
+               idx = gt0_is_gt ? GFX_rc6 : SAM_mc6;
+               gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/gtidle/idle_residency_ms";
+
+               idx = gt0_is_gt ? GFX_MHz : SAM_MHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/cur_freq";
+
+               idx = gt0_is_gt ? GFX_ACTMHz : SAM_ACTMHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt0/freq0/act_freq";
+
+               idx = gt0_is_gt ? SAM_mc6 : GFX_rc6;
+               if (!access("/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/gtidle/idle_residency_ms";
+
+               idx = gt0_is_gt ? SAM_MHz : GFX_MHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/cur_freq";
+
+               idx = gt0_is_gt ? SAM_ACTMHz : GFX_ACTMHz;
+               if (!access("/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq", R_OK))
+                       gfx_info[idx].path = "/sys/class/drm/card0/device/tile0/gt1/freq0/act_freq";
+
+               goto end;
+       }
+
+next:
+       /* New i915 graphics sysfs knobs */
+       if (!access("/sys/class/drm/card0/gt/gt0/rc6_residency_ms", R_OK)) {
+               gfx_info[GFX_rc6].path = "/sys/class/drm/card0/gt/gt0/rc6_residency_ms";
+
+               if (!access("/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz", R_OK))
+                       gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt/gt0/rps_cur_freq_mhz";
+
+               if (!access("/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz", R_OK))
+                       gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt/gt0/rps_act_freq_mhz";
+
+               if (!access("/sys/class/drm/card0/gt/gt1/rc6_residency_ms", R_OK))
+                       gfx_info[SAM_mc6].path = "/sys/class/drm/card0/gt/gt1/rc6_residency_ms";
+
+               if (!access("/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz", R_OK))
+                       gfx_info[SAM_MHz].path = "/sys/class/drm/card0/gt/gt1/rps_cur_freq_mhz";
+
+               if (!access("/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz", R_OK))
+                       gfx_info[SAM_ACTMHz].path = "/sys/class/drm/card0/gt/gt1/rps_act_freq_mhz";
+
+               goto end;
+       }
+
+       /* Fall back to traditional i915 graphics sysfs knobs */
        if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
-               BIC_PRESENT(BIC_GFX_rc6);
+               gfx_info[GFX_rc6].path = "/sys/class/drm/card0/power/rc6_residency_ms";
+
+       if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK))
+               gfx_info[GFX_MHz].path = "/sys/class/drm/card0/gt_cur_freq_mhz";
+       else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+               gfx_info[GFX_MHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz";
 
-       if (!access("/sys/class/drm/card0/gt_cur_freq_mhz", R_OK) ||
-           !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
-               BIC_PRESENT(BIC_GFXMHz);
 
-       if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK) ||
-           !access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+       if (!access("/sys/class/drm/card0/gt_act_freq_mhz", R_OK))
+               gfx_info[GFX_ACTMHz].path = "/sys/class/drm/card0/gt_act_freq_mhz";
+       else if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+               gfx_info[GFX_ACTMHz].path = "/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz";
+
+end:
+       if (gfx_info[GFX_rc6].path)
+               BIC_PRESENT(BIC_GFX_rc6);
+       if (gfx_info[GFX_MHz].path)
+               BIC_PRESENT(BIC_GFXMHz);
+       if (gfx_info[GFX_ACTMHz].path)
                BIC_PRESENT(BIC_GFXACTMHz);
+       if (gfx_info[SAM_mc6].path)
+               BIC_PRESENT(BIC_SAM_mc6);
+       if (gfx_info[SAM_MHz].path)
+               BIC_PRESENT(BIC_SAMMHz);
+       if (gfx_info[SAM_ACTMHz].path)
+               BIC_PRESENT(BIC_SAMACTMHz);
 }
 
 static void dump_sysfs_cstate_config(void)
@@ -4783,6 +5646,9 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        if (!has_hwp)
                return 0;
 
@@ -4869,6 +5735,9 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        cpu = t->cpu_id;
 
        /* per-package */
@@ -4983,31 +5852,18 @@ void rapl_probe_intel(void)
        unsigned long long msr;
        unsigned int time_unit;
        double tdp;
+       const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt | BIC_RAMWatt | BIC_GFXWatt;
+       const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J | BIC_RAM_J | BIC_GFX_J;
 
-       if (rapl_joules) {
-               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_Pkg_J);
-               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_Cor_J);
-               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_RAM_J);
-               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_GFX_J);
-       } else {
-               if (platform->rapl_msrs & RAPL_PKG_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_PkgWatt);
-               if (platform->rapl_msrs & RAPL_CORE_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_CorWatt);
-               if (platform->rapl_msrs & RAPL_DRAM_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_RAMWatt);
-               if (platform->rapl_msrs & RAPL_GFX_ENERGY_STATUS)
-                       BIC_PRESENT(BIC_GFXWatt);
-       }
+       if (rapl_joules)
+               bic_enabled &= ~bic_watt_bits;
+       else
+               bic_enabled &= ~bic_joules_bits;
 
-       if (platform->rapl_msrs & RAPL_PKG_PERF_STATUS)
-               BIC_PRESENT(BIC_PKG__);
-       if (platform->rapl_msrs & RAPL_DRAM_PERF_STATUS)
-               BIC_PRESENT(BIC_RAM__);
+       if (!(platform->rapl_msrs & RAPL_PKG_PERF_STATUS))
+               bic_enabled &= ~BIC_PKG__;
+       if (!(platform->rapl_msrs & RAPL_DRAM_PERF_STATUS))
+               bic_enabled &= ~BIC_RAM__;
 
        /* units on package 0, verify later other packages match */
        if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
@@ -5041,14 +5897,13 @@ void rapl_probe_amd(void)
 {
        unsigned long long msr;
        double tdp;
+       const unsigned long long bic_watt_bits = BIC_PkgWatt | BIC_CorWatt;
+       const unsigned long long bic_joules_bits = BIC_Pkg_J | BIC_Cor_J;
 
-       if (rapl_joules) {
-               BIC_PRESENT(BIC_Pkg_J);
-               BIC_PRESENT(BIC_Cor_J);
-       } else {
-               BIC_PRESENT(BIC_PkgWatt);
-               BIC_PRESENT(BIC_CorWatt);
-       }
+       if (rapl_joules)
+               bic_enabled &= ~bic_watt_bits;
+       else
+               bic_enabled &= ~bic_joules_bits;
 
        if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
                return;
@@ -5202,7 +6057,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
  */
 void probe_rapl(void)
 {
-       if (!platform->rapl_msrs)
+       if (!platform->rapl_msrs || no_msr)
                return;
 
        if (genuine_intel)
@@ -5258,7 +6113,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
        }
 
        /* Temperature Target MSR is Nehalem and newer only */
-       if (!platform->has_nhm_msrs)
+       if (!platform->has_nhm_msrs || no_msr)
                goto guess;
 
        if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
@@ -5305,6 +6160,9 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
        UNUSED(c);
        UNUSED(p);
 
+       if (no_msr)
+               return 0;
+
        if (!(do_dts || do_ptm))
                return 0;
 
@@ -5402,6 +6260,9 @@ void decode_feature_control_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
                fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
                        base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
@@ -5411,6 +6272,9 @@ void decode_misc_enable_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!genuine_intel)
                return;
 
@@ -5428,6 +6292,9 @@ void decode_misc_feature_control(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_misc_feature_control)
                return;
 
@@ -5449,6 +6316,9 @@ void decode_misc_pwr_mgmt_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_misc_pwr_mgmt)
                return;
 
@@ -5468,6 +6338,9 @@ void decode_c6_demotion_policy_msr(void)
 {
        unsigned long long msr;
 
+       if (no_msr)
+               return;
+
        if (!platform->has_msr_c6_demotion_policy_config)
                return;
 
@@ -5489,7 +6362,8 @@ void print_dev_latency(void)
 
        fd = open(path, O_RDONLY);
        if (fd < 0) {
-               warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
+               if (debug)
+                       warnx("Read %s failed", path);
                return;
        }
 
@@ -5504,23 +6378,260 @@ void print_dev_latency(void)
        close(fd);
 }
 
+static int has_instr_count_access(void)
+{
+       int fd;
+       int has_access;
+
+       if (no_perf)
+               return 0;
+
+       fd = open_perf_counter(base_cpu, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, -1, 0);
+       has_access = fd != -1;
+
+       if (fd != -1)
+               close(fd);
+
+       if (!has_access)
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly",
+                     "instructions retired perf counter", "--no-perf");
+
+       return has_access;
+}
+
+bool is_aperf_access_required(void)
+{
+       return BIC_IS_ENABLED(BIC_Avg_MHz)
+           || BIC_IS_ENABLED(BIC_Busy)
+           || BIC_IS_ENABLED(BIC_Bzy_MHz)
+           || BIC_IS_ENABLED(BIC_IPC);
+}
+
+int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+                          double *scale_, enum rapl_unit *unit_)
+{
+       if (no_perf)
+               return -1;
+
+       const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name);
+
+       if (scale == 0.0)
+               return -1;
+
+       const enum rapl_unit unit = read_perf_rapl_unit(cai->perf_subsys, cai->perf_name);
+
+       if (unit == RAPL_UNIT_INVALID)
+               return -1;
+
+       const unsigned int rapl_type = read_perf_type(cai->perf_subsys);
+       const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name);
+
+       const int fd_counter =
+           open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP);
+       if (fd_counter == -1)
+               return -1;
+
+       /* If it's the first counter opened, make it a group descriptor */
+       if (rci->fd_perf == -1)
+               rci->fd_perf = fd_counter;
+
+       *scale_ = scale;
+       *unit_ = unit;
+       return fd_counter;
+}
+
+int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai,
+                         double *scale, enum rapl_unit *unit)
+{
+       int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit);
+
+       if (debug)
+               fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu);
+
+       return ret;
+}
+
 /*
  * Linux-perf manages the HW instructions-retired counter
  * by enabling when requested, and hiding rollover
  */
 void linux_perf_init(void)
 {
-       if (!BIC_IS_ENABLED(BIC_IPC))
-               return;
-
        if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
                return;
 
-       fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
-       if (fd_instr_count_percpu == NULL)
-               err(-1, "calloc fd_instr_count_percpu");
+       if (BIC_IS_ENABLED(BIC_IPC) && has_aperf) {
+               fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+               if (fd_instr_count_percpu == NULL)
+                       err(-1, "calloc fd_instr_count_percpu");
+       }
+
+       const bool aperf_required = is_aperf_access_required();
+
+       if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) {
+               fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu));
+               if (fd_amperf_percpu == NULL)
+                       err(-1, "calloc fd_amperf_percpu");
+       }
+}
+
+void rapl_perf_init(void)
+{
+       const int num_domains = platform->has_per_core_rapl ? topo.num_cores : topo.num_packages;
+       bool *domain_visited = calloc(num_domains, sizeof(bool));
+
+       rapl_counter_info_perdomain = calloc(num_domains, sizeof(*rapl_counter_info_perdomain));
+       if (rapl_counter_info_perdomain == NULL)
+               err(-1, "calloc rapl_counter_info_percpu");
+
+       /*
+        * Initialize rapl_counter_info_percpu
+        */
+       for (int domain_id = 0; domain_id < num_domains; ++domain_id) {
+               struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[domain_id];
+
+               rci->fd_perf = -1;
+               for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) {
+                       rci->data[i] = 0;
+                       rci->source[i] = RAPL_SOURCE_NONE;
+               }
+       }
+
+       /*
+        * Open/probe the counters
+        * If can't get it via perf, fallback to MSR
+        */
+       for (size_t i = 0; i < ARRAY_SIZE(rapl_counter_arch_infos); ++i) {
+
+               const struct rapl_counter_arch_info *const cai = &rapl_counter_arch_infos[i];
+               bool has_counter = 0;
+               double scale;
+               enum rapl_unit unit;
+               int next_domain;
+
+               memset(domain_visited, 0, num_domains * sizeof(*domain_visited));
+
+               for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) {
+
+                       if (cpu_is_not_allowed(cpu))
+                               continue;
+
+                       /* Skip already seen and handled RAPL domains */
+                       next_domain =
+                           platform->has_per_core_rapl ? cpus[cpu].physical_core_id : cpus[cpu].physical_package_id;
+
+                       if (domain_visited[next_domain])
+                               continue;
+
+                       domain_visited[next_domain] = 1;
+
+                       struct rapl_counter_info_t *rci = &rapl_counter_info_perdomain[next_domain];
+
+                       /* Check if the counter is enabled and accessible */
+                       if (BIC_IS_ENABLED(cai->bic) && (platform->rapl_msrs & cai->feature_mask)) {
+
+                               /* Use perf API for this counter */
+                               if (!no_perf && cai->perf_name
+                                   && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) {
+                                       rci->source[cai->rci_index] = RAPL_SOURCE_PERF;
+                                       rci->scale[cai->rci_index] = scale * cai->compat_scale;
+                                       rci->unit[cai->rci_index] = unit;
+                                       rci->flags[cai->rci_index] = cai->flags;
+
+                                       /* Use MSR for this counter */
+                               } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) {
+                                       rci->source[cai->rci_index] = RAPL_SOURCE_MSR;
+                                       rci->msr[cai->rci_index] = cai->msr;
+                                       rci->msr_mask[cai->rci_index] = cai->msr_mask;
+                                       rci->msr_shift[cai->rci_index] = cai->msr_shift;
+                                       rci->unit[cai->rci_index] = RAPL_UNIT_JOULES;
+                                       rci->scale[cai->rci_index] = *cai->platform_rapl_msr_scale * cai->compat_scale;
+                                       rci->flags[cai->rci_index] = cai->flags;
+                               }
+                       }
+
+                       if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE)
+                               has_counter = 1;
+               }
+
+               /* If any CPU has access to the counter, make it present */
+               if (has_counter)
+                       BIC_PRESENT(cai->bic);
+       }
+
+       free(domain_visited);
+}
+
+static int has_amperf_access_via_msr(void)
+{
+       if (no_msr)
+               return 0;
+
+       if (probe_msr(base_cpu, MSR_IA32_APERF))
+               return 0;
+
+       if (probe_msr(base_cpu, MSR_IA32_MPERF))
+               return 0;
+
+       return 1;
+}
+
+static int has_amperf_access_via_perf(void)
+{
+       struct amperf_group_fd fds;
+
+       /*
+        * Cache the last result, so we don't warn the user multiple times
+        *
+        * Negative means cached, no access
+        * Zero means not cached
+        * Positive means cached, has access
+        */
+       static int has_access_cached;
+
+       if (no_perf)
+               return 0;
+
+       if (has_access_cached != 0)
+               return has_access_cached > 0;
+
+       fds = open_amperf_fd(base_cpu);
+       has_access_cached = (fds.aperf != -1) && (fds.mperf != -1);
+
+       if (fds.aperf == -1)
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly",
+                     "APERF perf counter", "--no-perf");
+       else
+               close(fds.aperf);
+
+       if (fds.mperf == -1)
+               warnx("Failed to access %s. Some of the counters may not be available\n"
+                     "\tRun as root to enable them or use %s to disable the access explicitly",
+                     "MPERF perf counter", "--no-perf");
+       else
+               close(fds.mperf);
+
+       if (has_access_cached == 0)
+               has_access_cached = -1;
+
+       return has_access_cached > 0;
+}
+
+/* Check if we can access APERF and MPERF */
+static int has_amperf_access(void)
+{
+       if (!is_aperf_access_required())
+               return 0;
+
+       if (!no_msr && has_amperf_access_via_msr())
+               return 1;
+
+       if (!no_perf && has_amperf_access_via_perf())
+               return 1;
 
-       BIC_PRESENT(BIC_IPC);
+       return 0;
 }
 
 void probe_cstates(void)
@@ -5563,7 +6674,7 @@ void probe_cstates(void)
        if (platform->has_msr_module_c6_res_ms)
                BIC_PRESENT(BIC_Mod_c6);
 
-       if (platform->has_ext_cst_msrs) {
+       if (platform->has_ext_cst_msrs && !no_msr) {
                BIC_PRESENT(BIC_Totl_c0);
                BIC_PRESENT(BIC_Any_c0);
                BIC_PRESENT(BIC_GFX_c0);
@@ -5623,6 +6734,7 @@ void process_cpuid()
        unsigned int eax, ebx, ecx, edx;
        unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
        unsigned long long ucode_patch = 0;
+       bool ucode_patch_valid = false;
 
        eax = ebx = ecx = edx = 0;
 
@@ -5650,8 +6762,12 @@ void process_cpuid()
        ecx_flags = ecx;
        edx_flags = edx;
 
-       if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
-               warnx("get_msr(UCODE)");
+       if (!no_msr) {
+               if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+                       warnx("get_msr(UCODE)");
+               else
+                       ucode_patch_valid = true;
+       }
 
        /*
         * check max extended function levels of CPUID.
@@ -5662,9 +6778,12 @@ void process_cpuid()
        __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
 
        if (!quiet) {
-               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
-                       family, model, stepping, family, model, stepping,
-                       (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d)",
+                       family, model, stepping, family, model, stepping);
+               if (ucode_patch_valid)
+                       fprintf(outf, " microcode 0x%x", (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+               fputc('\n', outf);
+
                fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
                fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
                        ecx_flags & (1 << 0) ? "SSE3" : "-",
@@ -5700,10 +6819,11 @@ void process_cpuid()
 
        __cpuid(0x6, eax, ebx, ecx, edx);
        has_aperf = ecx & (1 << 0);
-       if (has_aperf) {
+       if (has_aperf && has_amperf_access()) {
                BIC_PRESENT(BIC_Avg_MHz);
                BIC_PRESENT(BIC_Busy);
                BIC_PRESENT(BIC_Bzy_MHz);
+               BIC_PRESENT(BIC_IPC);
        }
        do_dts = eax & (1 << 0);
        if (do_dts)
@@ -5786,6 +6906,15 @@ void process_cpuid()
                base_mhz = max_mhz = bus_mhz = edx = 0;
 
                __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+
+               bclk = bus_mhz;
+
+               base_hz = base_mhz * 1000000;
+               has_base_hz = 1;
+
+               if (platform->enable_tsc_tweak)
+                       tsc_tweak = base_hz / tsc_hz;
+
                if (!quiet)
                        fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
                                base_mhz, max_mhz, bus_mhz);
@@ -5814,7 +6943,7 @@ void probe_pm_features(void)
 
        probe_thermal();
 
-       if (platform->has_nhm_msrs)
+       if (platform->has_nhm_msrs && !no_msr)
                BIC_PRESENT(BIC_SMI);
 
        if (!quiet)
@@ -6142,6 +7271,7 @@ void topology_update(void)
        topo.allowed_packages = 0;
        for_all_cpus(update_topo, ODD_COUNTERS);
 }
+
 void setup_all_buffers(bool startup)
 {
        topology_probe(startup);
@@ -6169,21 +7299,129 @@ void set_base_cpu(void)
        err(-ENODEV, "No valid cpus found");
 }
 
+static void set_amperf_source(void)
+{
+       amperf_source = AMPERF_SOURCE_PERF;
+
+       const bool aperf_required = is_aperf_access_required();
+
+       if (no_perf || !aperf_required || !has_amperf_access_via_perf())
+               amperf_source = AMPERF_SOURCE_MSR;
+
+       if (quiet || !debug)
+               return;
+
+       fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf");
+}
+
+bool has_added_counters(void)
+{
+       /*
+        * It only makes sense to call this after the command line is parsed,
+        * otherwise sys structure is not populated.
+        */
+
+       return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters;
+}
+
+bool is_msr_access_required(void)
+{
+       if (no_msr)
+               return false;
+
+       if (has_added_counters())
+               return true;
+
+       return BIC_IS_ENABLED(BIC_SMI)
+           || BIC_IS_ENABLED(BIC_CPU_c1)
+           || BIC_IS_ENABLED(BIC_CPU_c3)
+           || BIC_IS_ENABLED(BIC_CPU_c6)
+           || BIC_IS_ENABLED(BIC_CPU_c7)
+           || BIC_IS_ENABLED(BIC_Mod_c6)
+           || BIC_IS_ENABLED(BIC_CoreTmp)
+           || BIC_IS_ENABLED(BIC_Totl_c0)
+           || BIC_IS_ENABLED(BIC_Any_c0)
+           || BIC_IS_ENABLED(BIC_GFX_c0)
+           || BIC_IS_ENABLED(BIC_CPUGFX)
+           || BIC_IS_ENABLED(BIC_Pkgpc3)
+           || BIC_IS_ENABLED(BIC_Pkgpc6)
+           || BIC_IS_ENABLED(BIC_Pkgpc2)
+           || BIC_IS_ENABLED(BIC_Pkgpc7)
+           || BIC_IS_ENABLED(BIC_Pkgpc8)
+           || BIC_IS_ENABLED(BIC_Pkgpc9)
+           || BIC_IS_ENABLED(BIC_Pkgpc10)
+           /* TODO: Multiplex access with perf */
+           || BIC_IS_ENABLED(BIC_CorWatt)
+           || BIC_IS_ENABLED(BIC_Cor_J)
+           || BIC_IS_ENABLED(BIC_PkgWatt)
+           || BIC_IS_ENABLED(BIC_CorWatt)
+           || BIC_IS_ENABLED(BIC_GFXWatt)
+           || BIC_IS_ENABLED(BIC_RAMWatt)
+           || BIC_IS_ENABLED(BIC_Pkg_J)
+           || BIC_IS_ENABLED(BIC_Cor_J)
+           || BIC_IS_ENABLED(BIC_GFX_J)
+           || BIC_IS_ENABLED(BIC_RAM_J)
+           || BIC_IS_ENABLED(BIC_PKG__)
+           || BIC_IS_ENABLED(BIC_RAM__)
+           || BIC_IS_ENABLED(BIC_PkgTmp)
+           || (is_aperf_access_required() && !has_amperf_access_via_perf());
+}
+
+void check_msr_access(void)
+{
+       if (!is_msr_access_required())
+               no_msr = 1;
+
+       check_dev_msr();
+       check_msr_permission();
+
+       if (no_msr)
+               bic_disable_msr_access();
+}
+
+void check_perf_access(void)
+{
+       const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC);
+
+       if (no_perf || !intrcount_required || !has_instr_count_access())
+               bic_enabled &= ~BIC_IPC;
+
+       const bool aperf_required = is_aperf_access_required();
+
+       if (!aperf_required || !has_amperf_access()) {
+               bic_enabled &= ~BIC_Avg_MHz;
+               bic_enabled &= ~BIC_Busy;
+               bic_enabled &= ~BIC_Bzy_MHz;
+               bic_enabled &= ~BIC_IPC;
+       }
+}
+
 void turbostat_init()
 {
        setup_all_buffers(true);
        set_base_cpu();
-       check_dev_msr();
-       check_permissions();
+       check_msr_access();
+       check_perf_access();
        process_cpuid();
        probe_pm_features();
+       set_amperf_source();
        linux_perf_init();
+       rapl_perf_init();
 
        for_all_cpus(get_cpu_type, ODD_COUNTERS);
        for_all_cpus(get_cpu_type, EVEN_COUNTERS);
 
        if (DO_BIC(BIC_IPC))
                (void)get_instr_count_fd(base_cpu);
+
+       /*
+        * If TSC tweak is needed, but couldn't get it,
+        * disable more BICs, since it can't be reported accurately.
+        */
+       if (platform->enable_tsc_tweak && !has_base_hz) {
+               bic_enabled &= ~BIC_Busy;
+               bic_enabled &= ~BIC_Bzy_MHz;
+       }
 }
 
 int fork_it(char **argv)
@@ -6259,7 +7497,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-       fprintf(outf, "turbostat version 2023.11.07 - Len Brown <lenb@kernel.org>\n");
+       fprintf(outf, "turbostat version 2024.04.08 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048
@@ -6291,6 +7529,9 @@ int add_counter(unsigned int msr_num, char *path, char *name,
 {
        struct msr_counter *msrp;
 
+       if (no_msr && msr_num)
+               errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num);
+
        msrp = calloc(1, sizeof(struct msr_counter));
        if (msrp == NULL) {
                perror("calloc");
@@ -6595,6 +7836,8 @@ void cmdline(int argc, char **argv)
                { "list", no_argument, 0, 'l' },
                { "out", required_argument, 0, 'o' },
                { "quiet", no_argument, 0, 'q' },
+               { "no-msr", no_argument, 0, 'M' },
+               { "no-perf", no_argument, 0, 'P' },
                { "show", required_argument, 0, 's' },
                { "Summary", no_argument, 0, 'S' },
                { "TCC", required_argument, 0, 'T' },
@@ -6604,7 +7847,25 @@ void cmdline(int argc, char **argv)
 
        progname = argv[0];
 
-       while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
+       /*
+        * Parse some options early, because they may make other options invalid,
+        * like adding the MSR counter with --add and at the same time using --no-msr.
+        */
+       while ((opt = getopt_long_only(argc, argv, "MP", long_options, &option_index)) != -1) {
+               switch (opt) {
+               case 'M':
+                       no_msr = 1;
+                       break;
+               case 'P':
+                       no_perf = 1;
+                       break;
+               default:
+                       break;
+               }
+       }
+       optind = 0;
+
+       while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qMST:v", long_options, &option_index)) != -1) {
                switch (opt) {
                case 'a':
                        parse_add_command(optarg);
@@ -6662,6 +7923,10 @@ void cmdline(int argc, char **argv)
                case 'q':
                        quiet = 1;
                        break;
+               case 'M':
+               case 'P':
+                       /* Parsed earlier */
+                       break;
                case 'n':
                        num_iterations = strtod(optarg, NULL);
 
@@ -6704,6 +7969,22 @@ void cmdline(int argc, char **argv)
        }
 }
 
+void set_rlimit(void)
+{
+       struct rlimit limit;
+
+       if (getrlimit(RLIMIT_NOFILE, &limit) < 0)
+               err(1, "Failed to get rlimit");
+
+       if (limit.rlim_max < MAX_NOFILE)
+               limit.rlim_max = MAX_NOFILE;
+       if (limit.rlim_cur < MAX_NOFILE)
+               limit.rlim_cur = MAX_NOFILE;
+
+       if (setrlimit(RLIMIT_NOFILE, &limit) < 0)
+               err(1, "Failed to set rlimit");
+}
+
 int main(int argc, char **argv)
 {
        int fd, ret;
@@ -6729,9 +8010,13 @@ skip_cgroup_setting:
 
        probe_sysfs();
 
+       if (!getuid())
+               set_rlimit();
+
        turbostat_init();
 
-       msr_sum_record();
+       if (!no_msr)
+               msr_sum_record();
 
        /* dump counters and exit */
        if (dump_only)
index 908e0d0839369c2e41f090bddc2e9a9b9121b4c9..61c69297e7978fceed700be3ad43a7a870d20de2 100644 (file)
@@ -986,10 +986,12 @@ static void dpa_perf_setup(struct cxl_port *endpoint, struct range *range,
 {
        dpa_perf->qos_class = FAKE_QTG_ID;
        dpa_perf->dpa_range = *range;
-       dpa_perf->coord.read_latency = 500;
-       dpa_perf->coord.write_latency = 500;
-       dpa_perf->coord.read_bandwidth = 1000;
-       dpa_perf->coord.write_bandwidth = 1000;
+       for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) {
+               dpa_perf->coord[i].read_latency = 500;
+               dpa_perf->coord[i].write_latency = 500;
+               dpa_perf->coord[i].read_bandwidth = 1000;
+               dpa_perf->coord[i].write_bandwidth = 1000;
+       }
 }
 
 static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port)
index b1ede624986676a554514105936698fdd2b0a915..b7c8f29c09a978895c1176e1a39aeda8c97e8416 100644 (file)
@@ -18,7 +18,7 @@ echo 'sched:*' > set_event
 
 yield
 
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
     fail "at least fork, exec and exit events should be recorded"
 fi
@@ -29,7 +29,7 @@ echo 1 > events/sched/enable
 
 yield
 
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -lt 3 ]; then
     fail "at least fork, exec and exit events should be recorded"
 fi
@@ -40,7 +40,7 @@ echo 0 > events/sched/enable
 
 yield
 
-count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
 if [ $count -ne 0 ]; then
     fail "any of scheduler events should not be recorded"
 fi
index 541bf192e30e6bcec377908643d41d1d1dbf765a..14bbab0cce13521abbcae9bbd3772a567239c77f 100644 (file)
@@ -51,6 +51,7 @@
 #include <stdarg.h>
 #include <string.h>
 #include <stdio.h>
+#include <sys/utsname.h>
 #endif
 
 #ifndef ARRAY_SIZE
@@ -79,6 +80,9 @@
 #define KSFT_XPASS 3
 #define KSFT_SKIP  4
 
+#ifndef __noreturn
+#define __noreturn       __attribute__((__noreturn__))
+#endif
 #define __printf(a, b)   __attribute__((format(printf, a, b)))
 
 /* counters */
@@ -288,24 +292,26 @@ void ksft_test_result_code(int exit_code, const char *test_name,
        }
 
        /* Docs seem to call for double space if directive is absent */
-       if (!directive[0] && msg[0])
+       if (!directive[0] && msg)
                directive = " #  ";
 
-       va_start(args, msg);
        printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
        errno = saved_errno;
-       vprintf(msg, args);
+       if (msg) {
+               va_start(args, msg);
+               vprintf(msg, args);
+               va_end(args);
+       }
        printf("\n");
-       va_end(args);
 }
 
-static inline int ksft_exit_pass(void)
+static inline __noreturn int ksft_exit_pass(void)
 {
        ksft_print_cnts();
        exit(KSFT_PASS);
 }
 
-static inline int ksft_exit_fail(void)
+static inline __noreturn int ksft_exit_fail(void)
 {
        ksft_print_cnts();
        exit(KSFT_FAIL);
@@ -332,7 +338,7 @@ static inline int ksft_exit_fail(void)
                  ksft_cnt.ksft_xfail + \
                  ksft_cnt.ksft_xskip)
 
-static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
 {
        int saved_errno = errno;
        va_list args;
@@ -347,19 +353,19 @@ static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
        exit(KSFT_FAIL);
 }
 
-static inline int ksft_exit_xfail(void)
+static inline __noreturn int ksft_exit_xfail(void)
 {
        ksft_print_cnts();
        exit(KSFT_XFAIL);
 }
 
-static inline int ksft_exit_xpass(void)
+static inline __noreturn int ksft_exit_xpass(void)
 {
        ksft_print_cnts();
        exit(KSFT_XPASS);
 }
 
-static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
 {
        int saved_errno = errno;
        va_list args;
@@ -388,4 +394,21 @@ static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
        exit(KSFT_SKIP);
 }
 
+static inline int ksft_min_kernel_version(unsigned int min_major,
+                                         unsigned int min_minor)
+{
+#ifdef NOLIBC
+       ksft_print_msg("NOLIBC: Can't check kernel version: Function not implemented\n");
+       return 0;
+#else
+       unsigned int major, minor;
+       struct utsname info;
+
+       if (uname(&info) || sscanf(info.release, "%u.%u.", &major, &minor) != 2)
+               ksft_exit_fail_msg("Can't parse kernel version\n");
+
+       return major > min_major || (major == min_major && minor >= min_minor);
+#endif
+}
+
 #endif /* __KSELFTEST_H */
index 4fd735e48ee7eea99702fcb3e27f539c887b15bc..ba3ddeda24bf527295acaf32159097c03ac52153 100644 (file)
                FIXTURE_DATA(fixture_name) self; \
                pid_t child = 1; \
                int status = 0; \
+               bool jmp = false; \
                memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
                if (setjmp(_metadata->env) == 0) { \
                        /* Use the same _metadata. */ \
                                _metadata->exit_code = KSFT_FAIL; \
                        } \
                } \
+               else \
+                       jmp = true; \
                if (child == 0) { \
-                       if (_metadata->setup_completed && !_metadata->teardown_parent) \
+                       if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
                                fixture_name##_teardown(_metadata, &self, variant->data); \
                        _exit(0); \
                } \
@@ -1202,7 +1205,7 @@ void __run_test(struct __fixture_metadata *f,
                diagnostic = "unknown";
 
        ksft_test_result_code(t->exit_code, test_name,
-                             diagnostic ? "%s" : "", diagnostic);
+                             diagnostic ? "%s" : NULL, diagnostic);
 }
 
 static int test_harness_run(int argc, char **argv)
index 2fb6dd8adba6945d0000c19fe90da1002d8c0dd4..8b984fa042869e595507368541504f0b04d42014 100644 (file)
@@ -86,7 +86,7 @@ static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
 
        pos = strchr(line, ' ') + 1;
 
-       if (fscanf(fnetstat, type->header_name) == EOF)
+       if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF)
                test_error("fscanf(%s)", type->header_name);
        if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
                test_error("Unexpected netstat format (%c)", tmp);
index 92276f916f2f30d080ba3e1f5521c492192f8e98..e408b9243b2c5a5cf66785518fbfc16f2682b169 100644 (file)
@@ -17,37 +17,37 @@ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
 void __test_msg(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_print_msg(buf);
+       ksft_print_msg("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_ok(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_pass(buf);
+       ksft_test_result_pass("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_fail(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_fail(buf);
+       ksft_test_result_fail("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_xfail(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_xfail(buf);
+       ksft_test_result_xfail("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_error(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_error(buf);
+       ksft_test_result_error("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 void __test_skip(const char *buf)
 {
        pthread_mutex_lock(&ksft_print_lock);
-       ksft_test_result_skip(buf);
+       ksft_test_result_skip("%s", buf);
        pthread_mutex_unlock(&ksft_print_lock);
 }
 
index 7df8b8700e39e96292f8eafdf105ee0314a65497..a2fe88d35ac06e4f534bd4d452670528d9f77219 100644 (file)
@@ -256,8 +256,6 @@ static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
 
 static void test_client_active_rst(unsigned int port)
 {
-       /* one in queue, another accept()ed */
-       unsigned int wait_for = backlog + 2;
        int i, sk[3], err;
        bool is_writable[ARRAY_SIZE(sk)] = {false};
        unsigned int last = ARRAY_SIZE(sk) - 1;
@@ -275,16 +273,20 @@ static void test_client_active_rst(unsigned int port)
        for (i = 0; i < last; i++) {
                err = _test_connect_socket(sk[i], this_ip_dest, port,
                                               (i == 0) ? TEST_TIMEOUT_SEC : -1);
-
                if (err < 0)
                        test_error("failed to connect()");
        }
 
-       synchronize_threads(); /* 2: connection accept()ed, another queued */
-       err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+       synchronize_threads(); /* 2: two connections: one accept()ed, another queued */
+       err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC);
        if (err < 0)
                test_error("test_wait_fds(): %d", err);
 
+       /* async connect() with third sk to get into request_sock_queue */
+       err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+       if (err < 0)
+               test_error("failed to connect()");
+
        synchronize_threads(); /* 3: close listen socket */
        if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
                test_fail("Failed to send data on connected socket");
@@ -292,13 +294,14 @@ static void test_client_active_rst(unsigned int port)
                test_ok("Verified established tcp connection");
 
        synchronize_threads(); /* 4: finishing up */
-       err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
-       if (err < 0)
-               test_error("failed to connect()");
 
        synchronize_threads(); /* 5: closed active sk */
-       err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
-                           wait_for, TEST_TIMEOUT_SEC);
+       /*
+        * Wait for 2 connections: one accepted, another in the accept queue,
+        * the one in request_sock_queue won't get fully established, so
+        * doesn't receive an active RST, see inet_csk_listen_stop().
+        */
+       err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC);
        if (err < 0)
                test_error("select(): %d", err);
 
index 452de131fa3a9c720cd1fc4b9dc24438fd01d15d..517930f9721bd9b062d178def9fb296c17353119 100644 (file)
@@ -21,7 +21,7 @@ static void make_listen(int sk)
 static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
                                const char *tst)
 {
-       struct tcp_ao_info_opt tmp;
+       struct tcp_ao_info_opt tmp = {};
        socklen_t len = sizeof(tmp);
 
        if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
index 1d975bf52af33908593f61894233f9d8560cb16f..85b3baa3f7f34112ea95239c8819a2b1d834e22a 100644 (file)
@@ -34,7 +34,7 @@
 #endif
 
 #ifndef UDP_MAX_SEGMENTS
-#define UDP_MAX_SEGMENTS       (1 << 6UL)
+#define UDP_MAX_SEGMENTS       (1 << 7UL)
 #endif
 
 #define CONST_MTU_TEST 1500
index d49dd3ffd0d96abeaa38cd92f3040ef747726541..c001dd79179d5d28e51d69cbad4c7e9a6a026053 100644 (file)
@@ -66,7 +66,7 @@ static int check_diff(struct timeval start, struct timeval end)
        diff = end.tv_usec - start.tv_usec;
        diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
 
-       if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+       if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
                printf("Diff too high: %lld..", diff);
                return -1;
        }
@@ -184,80 +184,71 @@ static int check_timer_create(int which)
        return 0;
 }
 
-int remain;
-__thread int got_signal;
+static pthread_t ctd_thread;
+static volatile int ctd_count, ctd_failed;
 
-static void *distribution_thread(void *arg)
+static void ctd_sighandler(int sig)
 {
-       while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
-       return NULL;
+       if (pthread_self() != ctd_thread)
+               ctd_failed = 1;
+       ctd_count--;
 }
 
-static void distribution_handler(int nr)
+static void *ctd_thread_func(void *arg)
 {
-       if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
-               __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
-}
-
-/*
- * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
- * timer signals. This primarily tests that the kernel does not favour any one.
- */
-static int check_timer_distribution(void)
-{
-       int err, i;
-       timer_t id;
-       const int nthreads = 10;
-       pthread_t threads[nthreads];
        struct itimerspec val = {
                .it_value.tv_sec = 0,
                .it_value.tv_nsec = 1000 * 1000,
                .it_interval.tv_sec = 0,
                .it_interval.tv_nsec = 1000 * 1000,
        };
+       timer_t id;
 
-       remain = nthreads + 1;  /* worker threads + this thread */
-       signal(SIGALRM, distribution_handler);
-       err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
-       if (err < 0) {
-               ksft_perror("Can't create timer");
-               return -1;
-       }
-       err = timer_settime(id, 0, &val, NULL);
-       if (err < 0) {
-               ksft_perror("Can't set timer");
-               return -1;
-       }
+       /* 1/10 seconds to ensure the leader sleeps */
+       usleep(10000);
 
-       for (i = 0; i < nthreads; i++) {
-               err = pthread_create(&threads[i], NULL, distribution_thread,
-                                    NULL);
-               if (err) {
-                       ksft_print_msg("Can't create thread: %s (%d)\n",
-                                      strerror(errno), errno);
-                       return -1;
-               }
-       }
+       ctd_count = 100;
+       if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
+               return "Can't create timer\n";
+       if (timer_settime(id, 0, &val, NULL))
+               return "Can't set timer\n";
 
-       /* Wait for all threads to receive the signal. */
-       while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+       while (ctd_count > 0 && !ctd_failed)
+               ;
 
-       for (i = 0; i < nthreads; i++) {
-               err = pthread_join(threads[i], NULL);
-               if (err) {
-                       ksft_print_msg("Can't join thread: %s (%d)\n",
-                                      strerror(errno), errno);
-                       return -1;
-               }
-       }
+       if (timer_delete(id))
+               return "Can't delete timer\n";
 
-       if (timer_delete(id)) {
-               ksft_perror("Can't delete timer");
-               return -1;
-       }
+       return NULL;
+}
+
+/*
+ * Test that only the running thread receives the timer signal.
+ */
+static int check_timer_distribution(void)
+{
+       const char *errmsg;
 
-       ksft_test_result_pass("check_timer_distribution\n");
+       signal(SIGALRM, ctd_sighandler);
+
+       errmsg = "Can't create thread\n";
+       if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
+               goto err;
+
+       errmsg = "Can't join thread\n";
+       if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
+               goto err;
+
+       if (!ctd_failed)
+               ksft_test_result_pass("check signal distribution\n");
+       else if (ksft_min_kernel_version(6, 3))
+               ksft_test_result_fail("check signal distribution\n");
+       else
+               ksft_test_result_skip("check signal distribution (old kernel)\n");
        return 0;
+err:
+       ksft_print_msg("%s", errmsg);
+       return -1;
 }
 
 int main(int argc, char **argv)
index 48b9a803235a80413f0d94d9eb841d9f045779e8..d13ebde203221ae3fa81835fae684c8e180cf111 100644 (file)
@@ -21,9 +21,6 @@
  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *   GNU General Public License for more details.
  */
-
-
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <time.h>
@@ -62,45 +59,47 @@ int clear_time_state(void)
 #define NUM_FREQ_OUTOFRANGE 4
 #define NUM_FREQ_INVALID 2
 
+#define SHIFTED_PPM (1 << 16)
+
 long valid_freq[NUM_FREQ_VALID] = {
-       -499<<16,
-       -450<<16,
-       -400<<16,
-       -350<<16,
-       -300<<16,
-       -250<<16,
-       -200<<16,
-       -150<<16,
-       -100<<16,
-       -75<<16,
-       -50<<16,
-       -25<<16,
-       -10<<16,
-       -5<<16,
-       -1<<16,
+        -499 * SHIFTED_PPM,
+        -450 * SHIFTED_PPM,
+        -400 * SHIFTED_PPM,
+        -350 * SHIFTED_PPM,
+        -300 * SHIFTED_PPM,
+        -250 * SHIFTED_PPM,
+        -200 * SHIFTED_PPM,
+        -150 * SHIFTED_PPM,
+        -100 * SHIFTED_PPM,
+         -75 * SHIFTED_PPM,
+         -50 * SHIFTED_PPM,
+         -25 * SHIFTED_PPM,
+         -10 * SHIFTED_PPM,
+          -5 * SHIFTED_PPM,
+          -1 * SHIFTED_PPM,
        -1000,
-       1<<16,
-       5<<16,
-       10<<16,
-       25<<16,
-       50<<16,
-       75<<16,
-       100<<16,
-       150<<16,
-       200<<16,
-       250<<16,
-       300<<16,
-       350<<16,
-       400<<16,
-       450<<16,
-       499<<16,
+           1 * SHIFTED_PPM,
+           5 * SHIFTED_PPM,
+          10 * SHIFTED_PPM,
+          25 * SHIFTED_PPM,
+          50 * SHIFTED_PPM,
+          75 * SHIFTED_PPM,
+         100 * SHIFTED_PPM,
+         150 * SHIFTED_PPM,
+         200 * SHIFTED_PPM,
+         250 * SHIFTED_PPM,
+         300 * SHIFTED_PPM,
+         350 * SHIFTED_PPM,
+         400 * SHIFTED_PPM,
+         450 * SHIFTED_PPM,
+         499 * SHIFTED_PPM,
 };
 
 long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
-       -1000<<16,
-       -550<<16,
-       550<<16,
-       1000<<16,
+       -1000 * SHIFTED_PPM,
+        -550 * SHIFTED_PPM,
+         550 * SHIFTED_PPM,
+        1000 * SHIFTED_PPM,
 };
 
 #define LONG_MAX (~0UL>>1)
diff --git a/tools/testing/selftests/turbostat/defcolumns.py b/tools/testing/selftests/turbostat/defcolumns.py
new file mode 100755 (executable)
index 0000000..d9b0420
--- /dev/null
@@ -0,0 +1,60 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+
+turbostat = which('turbostat')
+if turbostat is None:
+       print('Could not find turbostat binary')
+       exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+       print('Could not find timeout binary')
+       exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+       print(f'turbostat failed with {proc_turbostat.returncode}')
+       exit(1)
+
+#
+# By default --list reports also "usec" and "Time_Of_Day_Seconds" columns
+# which are only visible when running with --debug.
+#
+expected_columns_debug = proc_turbostat.stdout.replace(b',', b'\t').strip()
+expected_columns = expected_columns_debug.replace(b'usec\t', b'').replace(b'Time_Of_Day_Seconds\t', b'').replace(b'X2APIC\t', b'').replace(b'APIC\t', b'')
+
+#
+# Run turbostat with no options for 10 seconds and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '1s']
+turbostat_argv = [turbostat, '-i', '0.250']
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+       print(f'turbostat failed with {proc_turbostat.returncode}')
+       exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns != actual_columns:
+       print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+       exit(1)
+print('OK')
+
+#
+# Same, but with --debug
+#
+turbostat_argv.append('--debug')
+
+print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+       print(f'turbostat failed with {proc_turbostat.returncode}')
+       exit(1)
+actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+if expected_columns_debug != actual_columns:
+       print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+       exit(1)
+print('OK')