Merge tag 'pm-6.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Apr 2024 19:55:40 +0000 (12:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 5 Apr 2024 19:55:40 +0000 (12:55 -0700)
Pull power management fix from Rafael Wysocki:
 "Fix a recent Energy Model change that went against a recent scheduler
  change made independently (Vincent Guittot)"

* tag 'pm-6.9-rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  PM: EM: fix wrong utilization estimation in em_cpu_energy()

316 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/mm/zswap.rst
Documentation/dev-tools/testing-overview.rst
Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
Documentation/networking/devlink/devlink-eswitch-attr.rst [new file with mode: 0644]
Documentation/networking/devlink/index.rst
Documentation/networking/representors.rst
Documentation/virt/kvm/x86/amd-memory-encryption.rst
Documentation/virt/kvm/x86/msr.rst
MAINTAINERS
arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi
arch/arm64/kernel/head.S
arch/arm64/kvm/arm.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vhe/tlb.c
arch/arm64/kvm/mmu.c
arch/riscv/kvm/aia_aplic.c
arch/riscv/kvm/vcpu_onereg.c
arch/x86/include/asm/alternative.h
arch/x86/include/asm/cpufeature.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/include/uapi/asm/kvm_para.h
arch/x86/kernel/callthunks.c
arch/x86/kernel/kvm.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/reverse_cpuid.h
arch/x86/kvm/svm/sev.c
arch/x86/kvm/trace.h
arch/x86/lib/retpoline.S
arch/x86/net/bpf_jit_comp.c
block/bdev.c
drivers/acpi/thermal.c
drivers/ata/ahci_st.c
drivers/ata/pata_macio.c
drivers/ata/sata_gemini.c
drivers/ata/sata_mv.c
drivers/ata/sata_sx4.c
drivers/bluetooth/btqca.c
drivers/bluetooth/hci_qca.c
drivers/gpio/gpiolib-cdev.c
drivers/gpio/gpiolib.c
drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
drivers/gpu/drm/drm_prime.c
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_display_device.h
drivers/gpu/drm/i915/display/intel_display_types.h
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp_mst.c
drivers/gpu/drm/i915/display/intel_psr.c
drivers/gpu/drm/i915/gt/gen8_ppgtt.c
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_gt.c
drivers/gpu/drm/i915/gt/intel_gt.h
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h [new file with mode: 0644]
drivers/gpu/drm/i915/gt/intel_gt_regs.h
drivers/gpu/drm/i915/gt/intel_workarounds.c
drivers/gpu/drm/nouveau/nouveau_uvmm.c
drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
drivers/gpu/drm/panfrost/panfrost_gpu.c
drivers/gpu/drm/xe/xe_device.c
drivers/gpu/drm/xe/xe_device_types.h
drivers/gpu/drm/xe/xe_exec.c
drivers/gpu/drm/xe/xe_exec_queue_types.h
drivers/gpu/drm/xe/xe_gt_pagefault.c
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
drivers/gpu/drm/xe/xe_gt_types.h
drivers/gpu/drm/xe/xe_preempt_fence.c
drivers/gpu/drm/xe/xe_pt.c
drivers/gpu/drm/xe/xe_ring_ops.c
drivers/gpu/drm/xe/xe_sched_job.c
drivers/gpu/drm/xe/xe_sched_job_types.h
drivers/gpu/drm/xe/xe_vm.c
drivers/gpu/drm/xe/xe_vm.h
drivers/gpu/drm/xe/xe_vm_types.h
drivers/mtd/devices/block2mtd.c
drivers/net/dsa/mv88e6xxx/chip.c
drivers/net/dsa/sja1105/sja1105_mdio.c
drivers/net/ethernet/broadcom/genet/bcmgenet.c
drivers/net/ethernet/freescale/fec_main.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/ich8lan.c
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/e1000e/phy.c
drivers/net/ethernet/intel/e1000e/phy.h
drivers/net/ethernet/intel/i40e/i40e.h
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_register.h
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_txrx.h
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_common.c
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_vf_vsi_vlan_ops.c
drivers/net/ethernet/intel/idpf/idpf_txrx.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c
drivers/net/ethernet/microsoft/mana/mana_en.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/ravb_main.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
drivers/net/ethernet/wangxun/txgbe/txgbe_phy.c
drivers/net/phy/micrel.c
drivers/net/usb/ax88179_178a.c
drivers/net/xen-netfront.c
drivers/pwm/core.c
drivers/thermal/gov_power_allocator.c
fs/aio.c
fs/bcachefs/Makefile
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_types.h
fs/bcachefs/backpointers.c
fs/bcachefs/backpointers.h
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/bset.c
fs/bcachefs/bset.h
fs/bcachefs/btree_cache.c
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_journal_iter.c
fs/bcachefs/btree_journal_iter.h
fs/bcachefs/btree_node_scan.c [new file with mode: 0644]
fs/bcachefs/btree_node_scan.h [new file with mode: 0644]
fs/bcachefs/btree_node_scan_types.h [new file with mode: 0644]
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/btree_update.c
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_write_buffer.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/chardev.c
fs/bcachefs/data_update.c
fs/bcachefs/errcode.h
fs/bcachefs/error.c
fs/bcachefs/error.h
fs/bcachefs/extents.c
fs/bcachefs/extents.h
fs/bcachefs/eytzinger.c [new file with mode: 0644]
fs/bcachefs/eytzinger.h
fs/bcachefs/fs-io-direct.c
fs/bcachefs/fs.c
fs/bcachefs/fsck.c
fs/bcachefs/inode.c
fs/bcachefs/io_misc.c
fs/bcachefs/journal_seq_blacklist.c
fs/bcachefs/logged_ops.c
fs/bcachefs/mean_and_variance_test.c
fs/bcachefs/opts.c
fs/bcachefs/opts.h
fs/bcachefs/recovery.c
fs/bcachefs/recovery.h
fs/bcachefs/recovery_passes.c [new file with mode: 0644]
fs/bcachefs/recovery_passes.h [new file with mode: 0644]
fs/bcachefs/recovery_passes_types.h [moved from fs/bcachefs/recovery_types.h with 91% similarity]
fs/bcachefs/reflink.c
fs/bcachefs/replicas.c
fs/bcachefs/sb-downgrade.c
fs/bcachefs/sb-errors_types.h
fs/bcachefs/snapshot.c
fs/bcachefs/snapshot.h
fs/bcachefs/subvolume.c
fs/bcachefs/subvolume.h
fs/bcachefs/subvolume_types.h
fs/bcachefs/super-io.c
fs/bcachefs/super.c
fs/bcachefs/util.c
fs/bcachefs/util.h
fs/cramfs/inode.c
fs/ext4/super.c
fs/f2fs/super.c
fs/jfs/jfs_logmgr.c
fs/namei.c
fs/reiserfs/journal.c
fs/romfs/super.c
fs/smb/server/ksmbd_netlink.h
fs/smb/server/mgmt/share_config.c
fs/smb/server/smb2ops.c
fs/smb/server/smb2pdu.c
fs/smb/server/transport_ipc.c
fs/super.c
fs/vboxsf/file.c
fs/vboxsf/super.c
fs/vboxsf/utils.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_super.c
include/kvm/arm_pmu.h
include/linux/blkdev.h
include/linux/bpf.h
include/linux/fs.h
include/linux/udp.h
include/net/bluetooth/hci.h
include/net/mana/mana.h
include/sound/hdaudio_ext.h
include/sound/tas2781-tlv.h
kernel/bpf/syscall.c
kernel/bpf/verifier.c
kernel/trace/bpf_trace.c
net/9p/client.c
net/9p/trans_fd.c
net/ax25/ax25_dev.c
net/bluetooth/hci_core.c
net/bluetooth/hci_debugfs.c
net/bluetooth/hci_event.c
net/bluetooth/hci_sync.c
net/bridge/netfilter/ebtables.c
net/core/dev.c
net/core/gro.c
net/core/sock_map.c
net/hsr/hsr_device.c
net/ipv4/inet_connection_sock.c
net/ipv4/ip_gre.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/udp.c
net/ipv4/udp_offload.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/udp.c
net/ipv6/udp_offload.c
net/mptcp/protocol.c
net/mptcp/sockopt.c
net/mptcp/subflow.c
net/netfilter/nf_tables_api.c
net/rds/rdma.c
net/sched/act_skbmod.c
net/sched/sch_api.c
net/vmw_vsock/virtio_transport.c
scripts/kernel-doc
security/security.c
security/selinux/selinuxfs.c
sound/oss/dmasound/dmasound_paula.c
sound/pci/emu10k1/emu10k1_callback.c
sound/pci/hda/cs35l41_hda_property.c
sound/pci/hda/cs35l56_hda_i2c.c
sound/pci/hda/cs35l56_hda_spi.c
sound/pci/hda/patch_realtek.c
sound/soc/amd/acp/acp-pci.c
sound/soc/codecs/cs-amp-lib.c
sound/soc/codecs/cs42l43.c
sound/soc/codecs/es8326.c
sound/soc/codecs/es8326.h
sound/soc/codecs/rt1316-sdw.c
sound/soc/codecs/rt1318-sdw.c
sound/soc/codecs/rt5682-sdw.c
sound/soc/codecs/rt700.c
sound/soc/codecs/rt711-sdca-sdw.c
sound/soc/codecs/rt711-sdca.c
sound/soc/codecs/rt711-sdw.c
sound/soc/codecs/rt711.c
sound/soc/codecs/rt712-sdca-dmic.c
sound/soc/codecs/rt712-sdca-sdw.c
sound/soc/codecs/rt712-sdca.c
sound/soc/codecs/rt715-sdca-sdw.c
sound/soc/codecs/rt715-sdca.c
sound/soc/codecs/rt715-sdw.c
sound/soc/codecs/rt715.c
sound/soc/codecs/rt722-sdca-sdw.c
sound/soc/codecs/rt722-sdca.c
sound/soc/codecs/wm_adsp.c
sound/soc/intel/avs/boards/da7219.c
sound/soc/intel/avs/boards/dmic.c
sound/soc/intel/avs/boards/es8336.c
sound/soc/intel/avs/boards/i2s_test.c
sound/soc/intel/avs/boards/max98357a.c
sound/soc/intel/avs/boards/max98373.c
sound/soc/intel/avs/boards/max98927.c
sound/soc/intel/avs/boards/nau8825.c
sound/soc/intel/avs/boards/probe.c
sound/soc/intel/avs/boards/rt274.c
sound/soc/intel/avs/boards/rt286.c
sound/soc/intel/avs/boards/rt298.c
sound/soc/intel/avs/boards/rt5514.c
sound/soc/intel/avs/boards/rt5663.c
sound/soc/intel/avs/boards/rt5682.c
sound/soc/intel/avs/boards/ssm4567.c
sound/soc/soc-ops.c
sound/soc/sof/amd/acp.c
sound/soc/sof/core.c
sound/soc/sof/intel/hda-common-ops.c
sound/soc/sof/intel/hda-dai-ops.c
sound/soc/sof/intel/hda-dsp.c
sound/soc/sof/intel/hda-pcm.c
sound/soc/sof/intel/hda-stream.c
sound/soc/sof/intel/hda.h
sound/soc/sof/intel/lnl.c
sound/soc/sof/intel/mtl.c
sound/soc/sof/intel/mtl.h
sound/soc/sof/ipc4-mtrace.c
sound/soc/sof/ipc4-pcm.c
sound/soc/sof/ipc4-priv.h
sound/soc/sof/ipc4-topology.c
sound/soc/sof/ops.h
sound/soc/sof/pcm.c
sound/soc/sof/sof-audio.h
sound/soc/sof/sof-priv.h
sound/usb/line6/driver.c
tools/include/linux/btf_ids.h
tools/testing/selftests/kvm/aarch64/arch_timer.c
tools/testing/selftests/kvm/include/x86_64/processor.h
tools/testing/selftests/kvm/riscv/arch_timer.c
tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
tools/testing/selftests/net/bind_wildcard.c
tools/testing/selftests/net/mptcp/mptcp_connect.sh
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/reuseaddr_conflict.c
tools/testing/selftests/net/udpgro_fwd.sh

index bb884c14b2f679dba3a36ba89755a1eca2fe6db2..623fce7d5fcd0c4392432908e21aaba5134e3aa0 100644 (file)
                        To turn off having tracepoints sent to printk,
                         echo 0 > /proc/sys/kernel/tracepoint_printk
                        Note, echoing 1 into this file without the
-                       tracepoint_printk kernel cmdline option has no effect.
+                       tp_printk kernel cmdline option has no effect.
 
                        The tp_printk_stop_on_boot (see below) can also be used
                        to stop the printing of events to console at
index b42132969e31570986f4d60eaf3e550c6365ab2a..13632671adaeaa91ac65499df7b9682925ee316a 100644 (file)
@@ -155,7 +155,7 @@ Setting this parameter to 100 will disable the hysteresis.
 
 Some users cannot tolerate the swapping that comes with zswap store failures
 and zswap writebacks. Swapping can be disabled entirely (without disabling
-zswap itself) on a cgroup-basis as follows:
+zswap itself) on a cgroup-basis as follows::
 
        echo 0 > /sys/fs/cgroup/<cgroup-name>/memory.zswap.writeback
 
@@ -166,7 +166,7 @@ writeback (because the same pages might be rejected again and again).
 When there is a sizable amount of cold memory residing in the zswap pool, it
 can be advantageous to proactively write these cold pages to swap and reclaim
 the memory for other use cases. By default, the zswap shrinker is disabled.
-User can enable it as follows:
+User can enable it as follows::
 
   echo Y > /sys/module/zswap/parameters/shrinker_enabled
 
index 0aaf6ea53608fc67c0a9bc52b5342c0ce95be90b..1619e5e5cc9c4bae4ccdb043d5dc45489422fe42 100644 (file)
@@ -104,6 +104,8 @@ Some of these tools are listed below:
   KASAN and can be used in production. See Documentation/dev-tools/kfence.rst
 * lockdep is a locking correctness validator. See
   Documentation/locking/lockdep-design.rst
+* Runtime Verification (RV) supports checking specific behaviours for a given
+  subsystem. See Documentation/trace/rv/runtime-verification.rst
 * There are several other pieces of debug instrumentation in the kernel, many
   of which can be found in lib/Kconfig.debug
 
index 528ef3572b621e75ee6cadfe7e8f82652f54476d..055a3351880bc16d0df6e0f8636ea3f1a47360a4 100644 (file)
@@ -94,6 +94,10 @@ properties:
 
   local-bd-address: true
 
+  qcom,local-bd-address-broken:
+    type: boolean
+    description:
+      boot firmware is incorrectly passing the address in big-endian order
 
 required:
   - compatible
diff --git a/Documentation/networking/devlink/devlink-eswitch-attr.rst b/Documentation/networking/devlink/devlink-eswitch-attr.rst
new file mode 100644 (file)
index 0000000..08bb39a
--- /dev/null
@@ -0,0 +1,76 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+Devlink E-Switch Attribute
+==========================
+
+Devlink E-Switch supports two modes of operation: legacy and switchdev.
+Legacy mode operates based on traditional MAC/VLAN steering rules. Switching
+decisions are made based on MAC addresses, VLANs, etc. There is limited ability
+to offload switching rules to hardware.
+
+On the other hand, switchdev mode allows for more advanced offloading
+capabilities of the E-Switch to hardware. In switchdev mode, more switching
+rules and logic can be offloaded to the hardware switch ASIC. It enables
+representor netdevices that represent the slow path of virtual functions (VFs)
+or scalable-functions (SFs) of the device. See more information about
+:ref:`Documentation/networking/switchdev.rst <switchdev>` and
+:ref:`Documentation/networking/representors.rst <representors>`.
+
+In addition, the devlink E-Switch also comes with other attributes listed
+in the following section.
+
+Attributes Description
+======================
+
+The following is a list of E-Switch attributes.
+
+.. list-table:: E-Switch attributes
+   :widths: 8 5 45
+
+   * - Name
+     - Type
+     - Description
+   * - ``mode``
+     - enum
+     - The mode of the device. The mode can be one of the following:
+
+       * ``legacy`` operates based on traditional MAC/VLAN steering
+         rules.
+       * ``switchdev`` allows for more advanced offloading capabilities of
+         the E-Switch to hardware.
+   * - ``inline-mode``
+     - enum
+     - Some HWs need the VF driver to put part of the packet
+       headers on the TX descriptor so the e-switch can do proper
+       matching and steering. Support for both switchdev mode and legacy mode.
+
+       * ``none`` none.
+       * ``link`` L2 mode.
+       * ``network`` L3 mode.
+       * ``transport`` L4 mode.
+   * - ``encap-mode``
+     - enum
+     - The encapsulation mode of the device. Support for both switchdev mode
+       and legacy mode. The mode can be one of the following:
+
+       * ``none`` Disable encapsulation support.
+       * ``basic`` Enable encapsulation support.
+
+Example Usage
+=============
+
+.. code:: shell
+
+    # enable switchdev mode
+    $ devlink dev eswitch set pci/0000:08:00.0 mode switchdev
+
+    # set inline-mode and encap-mode
+    $ devlink dev eswitch set pci/0000:08:00.0 inline-mode none encap-mode basic
+
+    # display devlink device eswitch attributes
+    $ devlink dev eswitch show pci/0000:08:00.0
+      pci/0000:08:00.0: mode switchdev inline-mode none encap-mode basic
+
+    # enable encap-mode with legacy mode
+    $ devlink dev eswitch set pci/0000:08:00.0 mode legacy inline-mode none encap-mode basic
index e14d7a701b72bc7bbb942ccc82956e40bf42a0aa..948c8c44e233f6cb736f5492d158832b4844f7d8 100644 (file)
@@ -67,6 +67,7 @@ general.
    devlink-selftests
    devlink-trap
    devlink-linecard
+   devlink-eswitch-attr
 
 Driver-specific documentation
 -----------------------------
index decb39c19b9ed23c2af7b1b8457285e250a15d56..5e23386f69687f101a4145dbeb869a91bbf5295b 100644 (file)
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+.. _representors:
 
 =============================
 Network Function Representors
index 995780088eb23142a169542fad4d298b4cbc2cfe..84335d119ff136a0236594b43ef5fc21126d69e3 100644 (file)
@@ -46,21 +46,16 @@ SEV hardware uses ASIDs to associate a memory encryption key with a VM.
 Hence, the ASID for the SEV-enabled guests must be from 1 to a maximum value
 defined in the CPUID 0x8000001f[ecx] field.
 
-SEV Key Management
-==================
+The KVM_MEMORY_ENCRYPT_OP ioctl
+===============================
 
-The SEV guest key management is handled by a separate processor called the AMD
-Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
-key management interface to perform common hypervisor activities such as
-encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
-information, see the SEV Key Management spec [api-spec]_
-
-The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP.  If the argument
-to KVM_MEMORY_ENCRYPT_OP is NULL, the ioctl returns 0 if SEV is enabled
-and ``ENOTTY`` if it is disabled (on some older versions of Linux,
-the ioctl runs normally even with a NULL argument, and therefore will
-likely return ``EFAULT``).  If non-NULL, the argument to KVM_MEMORY_ENCRYPT_OP
-must be a struct kvm_sev_cmd::
+The main ioctl to access SEV is KVM_MEMORY_ENCRYPT_OP, which operates on
+the VM file descriptor.  If the argument to KVM_MEMORY_ENCRYPT_OP is NULL,
+the ioctl returns 0 if SEV is enabled and ``ENOTTY`` if it is disabled
+(on some older versions of Linux, the ioctl tries to run normally even
+with a NULL argument, and therefore will likely return ``EFAULT`` instead
+of zero if SEV is enabled).  If non-NULL, the argument to
+KVM_MEMORY_ENCRYPT_OP must be a struct kvm_sev_cmd::
 
        struct kvm_sev_cmd {
                __u32 id;
@@ -87,10 +82,6 @@ guests, such as launching, running, snapshotting, migrating and decommissioning.
 The KVM_SEV_INIT command is used by the hypervisor to initialize the SEV platform
 context. In a typical workflow, this command should be the first command issued.
 
-The firmware can be initialized either by using its own non-volatile storage or
-the OS can manage the NV storage for the firmware using the module parameter
-``init_ex_path``. If the file specified by ``init_ex_path`` does not exist or
-is invalid, the OS will create or override the file with output from PSP.
 
 Returns: 0 on success, -negative on error
 
@@ -434,6 +425,21 @@ issued by the hypervisor to make the guest ready for execution.
 
 Returns: 0 on success, -negative on error
 
+Firmware Management
+===================
+
+The SEV guest key management is handled by a separate processor called the AMD
+Secure Processor (AMD-SP). Firmware running inside the AMD-SP provides a secure
+key management interface to perform common hypervisor activities such as
+encrypting bootstrap code, snapshot, migrating and debugging the guest. For more
+information, see the SEV Key Management spec [api-spec]_
+
+The AMD-SP firmware can be initialized either by using its own non-volatile
+storage or the OS can manage the NV storage for the firmware using
+parameter ``init_ex_path`` of the ``ccp`` module. If the file specified
+by ``init_ex_path`` does not exist or is invalid, the OS will create or
+override the file with PSP non-volatile storage.
+
 References
 ==========
 
index 9315fc385fb0bedb71fd2cf0d80aa8d32d23a9b5..3aecf2a70e7b43110f5ece45d6b992bcccff75b9 100644 (file)
@@ -193,8 +193,8 @@ data:
        Asynchronous page fault (APF) control MSR.
 
        Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area
-       which must be in guest RAM and must be zeroed. This memory is expected
-       to hold a copy of the following structure::
+       which must be in guest RAM. This memory is expected to hold the
+       following structure::
 
          struct kvm_vcpu_pv_apf_data {
                /* Used for 'page not present' events delivered via #PF */
@@ -204,7 +204,6 @@ data:
                __u32 token;
 
                __u8 pad[56];
-               __u32 enabled;
          };
 
        Bits 5-4 of the MSR are reserved and should be zero. Bit 0 is set to 1
@@ -232,14 +231,14 @@ data:
        as regular page fault, guest must reset 'flags' to '0' before it does
        something that can generate normal page fault.
 
-       Bytes 5-7 of 64 byte memory location ('token') will be written to by the
+       Bytes 4-7 of 64 byte memory location ('token') will be written to by the
        hypervisor at the time of APF 'page ready' event injection. The content
-       of these bytes is a token which was previously delivered as 'page not
-       present' event. The event indicates the page in now available. Guest is
-       supposed to write '0' to 'token' when it is done handling 'page ready'
-       event and to write 1' to MSR_KVM_ASYNC_PF_ACK after clearing the location;
-       writing to the MSR forces KVM to re-scan its queue and deliver the next
-       pending notification.
+       of these bytes is a token which was previously delivered in CR2 as
+       'page not present' event. The event indicates the page is now available.
+       Guest is supposed to write '0' to 'token' when it is done handling
+       'page ready' event and to write '1' to MSR_KVM_ASYNC_PF_ACK after
+       clearing the location; writing to the MSR forces KVM to re-scan its
+       queue and deliver the next pending notification.
 
        Note, MSR_KVM_ASYNC_PF_INT MSR specifying the interrupt vector for 'page
        ready' APF delivery needs to be written to before enabling APF mechanism
index 7c121493f43d01e610219a8509dd38e181580388..75381386fe4c38486ce7a68f69c46abd0692ad66 100644 (file)
@@ -14019,6 +14019,7 @@ F:      drivers/net/ethernet/mellanox/mlx4/en_*
 
 MELLANOX ETHERNET DRIVER (mlx5e)
 M:     Saeed Mahameed <saeedm@nvidia.com>
+M:     Tariq Toukan <tariqt@nvidia.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://www.mellanox.com
@@ -14086,6 +14087,7 @@ F:      include/uapi/rdma/mlx4-abi.h
 MELLANOX MLX5 core VPI driver
 M:     Saeed Mahameed <saeedm@nvidia.com>
 M:     Leon Romanovsky <leonro@nvidia.com>
+M:     Tariq Toukan <tariqt@nvidia.com>
 L:     netdev@vger.kernel.org
 L:     linux-rdma@vger.kernel.org
 S:     Supported
@@ -23679,7 +23681,6 @@ F:      drivers/scsi/vmw_pvscsi.c
 F:     drivers/scsi/vmw_pvscsi.h
 
 VMWARE VIRTUAL PTP CLOCK DRIVER
-M:     Jeff Sipek <jsipek@vmware.com>
 R:     Ajay Kaher <akaher@vmware.com>
 R:     Alexey Makhalov <amakhalov@vmware.com>
 R:     VMware PV-Drivers Reviewers <pv-drivers@vmware.com>
index f3a6da8b28901907d0fbc533b2d63c498ecb5afc..5260c63db0078ba6689b1cf3e016134810aa995a 100644 (file)
@@ -944,6 +944,8 @@ ap_spi_fp: &spi10 {
                vddrf-supply = <&pp1300_l2c>;
                vddch0-supply = <&pp3300_l10c>;
                max-speed = <3200000>;
+
+               qcom,local-bd-address-broken;
        };
 };
 
index ce08b744aaab22fb2a5886b5c08005261ca7d5b0..06234c3a15f3dbe7a5ee874c41b05bb72c2894d4 100644 (file)
@@ -291,6 +291,21 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
        blr     x2
 0:
        mov_q   x0, HCR_HOST_NVHE_FLAGS
+
+       /*
+        * Compliant CPUs advertise their VHE-onlyness with
+        * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
+        * RES1 in that case. Publish the E2H bit early so that
+        * it can be picked up by the init_el2_state macro.
+        *
+        * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but
+        * don't advertise it (they predate this relaxation).
+        */
+       mrs_s   x1, SYS_ID_AA64MMFR4_EL1
+       tbz     x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
+
+       orr     x0, x0, #HCR_E2H
+1:
        msr     hcr_el2, x0
        isb
 
@@ -303,22 +318,10 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
 
        mov_q   x1, INIT_SCTLR_EL1_MMU_OFF
 
-       /*
-        * Compliant CPUs advertise their VHE-onlyness with
-        * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be
-        * RES1 in that case.
-        *
-        * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but
-        * don't advertise it (they predate this relaxation).
-        */
-       mrs_s   x0, SYS_ID_AA64MMFR4_EL1
-       ubfx    x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH
-       tbnz    x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f
-
        mrs     x0, hcr_el2
        and     x0, x0, #HCR_E2H
        cbz     x0, 2f
-1:
+
        /* Set a sane SCTLR_EL1, the VHE way */
        pre_disable_mmu_workaround
        msr_s   SYS_SCTLR_EL12, x1
index 3dee5490eea94dd08e4ff88cb79f41d5d60be139..c4a0a35e02c72866b87c8a3e827b87ed5303504e 100644 (file)
@@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void)
        if (err)
                goto out_hyp;
 
-       if (is_protected_kvm_enabled()) {
-               kvm_info("Protected nVHE mode initialized successfully\n");
-       } else if (in_hyp_mode) {
-               kvm_info("VHE mode initialized successfully\n");
-       } else {
-               char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n';
-               kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode);
-       }
+       kvm_info("%s%sVHE mode initialized successfully\n",
+                in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
+                                    "Protected " : "Hyp "),
+                in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
+                                    "h" : "n"));
 
        /*
         * FIXME: Do something reasonable if kvm_init() fails after pKVM
index a60fb13e21924f4af56162687231251d8655ba04..2fc68da4036d901f8646753aa2935a6278235d0f 100644 (file)
@@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
        /* Switch to requested VMID */
        __tlb_switch_to_guest(mmu, &cxt, false);
 
-       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+                               TLBI_TTL_UNKNOWN);
 
        dsb(ish);
        __tlbi(vmalle1is);
index 3fae5830f8d2c72f4ed4032cfd99fd285cbcb885..5a59ef88b646f054b4c1f4a3994803e817efd94a 100644 (file)
@@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
                kvm_clear_pte(ctx->ptep);
                dsb(ishst);
-               __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
+               __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN);
        } else {
                if (ctx->end - ctx->addr < granule)
                        return -EINVAL;
@@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
                 * Perform the appropriate TLB invalidation based on the
                 * evicted pte value (if any).
                 */
-               if (kvm_pte_table(ctx->old, ctx->level))
-                       kvm_tlb_flush_vmid_range(mmu, ctx->addr,
-                                               kvm_granule_size(ctx->level));
-               else if (kvm_pte_valid(ctx->old))
+               if (kvm_pte_table(ctx->old, ctx->level)) {
+                       u64 size = kvm_granule_size(ctx->level);
+                       u64 addr = ALIGN_DOWN(ctx->addr, size);
+
+                       kvm_tlb_flush_vmid_range(mmu, addr, size);
+               } else if (kvm_pte_valid(ctx->old)) {
                        kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
                                     ctx->addr, ctx->level);
+               }
        }
 
        if (stage2_pte_is_counted(ctx->old))
@@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
        if (kvm_pte_valid(ctx->old)) {
                kvm_clear_pte(ctx->ptep);
 
-               if (!stage2_unmap_defer_tlb_flush(pgt))
-                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
-                                       ctx->addr, ctx->level);
+               if (kvm_pte_table(ctx->old, ctx->level)) {
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+                                    TLBI_TTL_UNKNOWN);
+               } else if (!stage2_unmap_defer_tlb_flush(pgt)) {
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr,
+                                    ctx->level);
+               }
        }
 
        mm_ops->put_page(ctx->ptep);
index b32e2940df7dc83418fe39c4095998033326262e..1a60b95381e8e90af73083e0f46ab3211eea96ee 100644 (file)
@@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
        /* Switch to requested VMID */
        __tlb_switch_to_guest(mmu, &cxt);
 
-       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+       __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
+                               TLBI_TTL_UNKNOWN);
 
        dsb(ish);
        __tlbi(vmalle1is);
index 18680771cdb0ea4c9ee2fcea29d1219189fda752..dc04bc7678659a0d6d5a69636bca48673a83047e 100644 (file)
@@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
        fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
        is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
 
-       if (esr_fsc_is_permission_fault(esr)) {
+       if (esr_fsc_is_translation_fault(esr)) {
                /* Beyond sanitised PARange (which is the IPA limit) */
                if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
                        kvm_inject_size_fault(vcpu);
index 39e72aa016a4cc2d96a4994a342762762f2189c2..b467ba5ed9100024d0cbdd876917379ac343fc7a 100644 (file)
@@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending)
        raw_spin_lock_irqsave(&irqd->lock, flags);
 
        sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK;
-       if (!pending &&
-           ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) ||
-            (sm == APLIC_SOURCECFG_SM_LEVEL_LOW)))
+       if (sm == APLIC_SOURCECFG_SM_INACTIVE)
                goto skip_write_pending;
 
+       if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH ||
+           sm == APLIC_SOURCECFG_SM_LEVEL_LOW) {
+               if (!pending)
+                       goto skip_write_pending;
+               if ((irqd->state & APLIC_IRQ_STATE_INPUT) &&
+                   sm == APLIC_SOURCECFG_SM_LEVEL_LOW)
+                       goto skip_write_pending;
+               if (!(irqd->state & APLIC_IRQ_STATE_INPUT) &&
+                   sm == APLIC_SOURCECFG_SM_LEVEL_HIGH)
+                       goto skip_write_pending;
+       }
+
        if (pending)
                irqd->state |= APLIC_IRQ_STATE_PENDING;
        else
@@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled)
 
 static bool aplic_read_input(struct aplic *aplic, u32 irq)
 {
-       bool ret;
-       unsigned long flags;
+       u32 sourcecfg, sm, raw_input, irq_inverted;
        struct aplic_irq *irqd;
+       unsigned long flags;
+       bool ret = false;
 
        if (!irq || aplic->nr_irqs <= irq)
                return false;
        irqd = &aplic->irqs[irq];
 
        raw_spin_lock_irqsave(&irqd->lock, flags);
-       ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false;
+
+       sourcecfg = irqd->sourcecfg;
+       if (sourcecfg & APLIC_SOURCECFG_D)
+               goto skip;
+
+       sm = sourcecfg & APLIC_SOURCECFG_SM_MASK;
+       if (sm == APLIC_SOURCECFG_SM_INACTIVE)
+               goto skip;
+
+       raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0;
+       irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW ||
+                       sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0;
+       ret = !!(raw_input ^ irq_inverted);
+
+skip:
        raw_spin_unlock_irqrestore(&irqd->lock, flags);
 
        return ret;
index f4a6124d25c939ecdf5dc631d8c7aa69a2684621..994adc26db4b10d81557535a70aaddbb748217f4 100644 (file)
@@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
 
 static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
 {
-       return copy_isa_ext_reg_indices(vcpu, NULL);;
+       return copy_isa_ext_reg_indices(vcpu, NULL);
 }
 
 static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
index fcd20c6dc7f90c6b462463969b63debf4ec02759..67b68d0d17d1ec72a93df469a11708099703eac4 100644 (file)
@@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void);
 extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
                                          struct module *mod);
 extern void *callthunks_translate_call_dest(void *dest);
-extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip);
 #else
 static __always_inline void callthunks_patch_builtin_calls(void) {}
 static __always_inline void
@@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
        return dest;
 }
 static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
-                                                         void *func)
+                                                         void *func, void *ip)
 {
        return 0;
 }
index 42157ddcc09d436fef9684a813b3e9b9e666e9da..686e92d2663eeeacd90a46568ae37b3db76b9e00 100644 (file)
@@ -33,6 +33,8 @@ enum cpuid_leafs
        CPUID_7_EDX,
        CPUID_8000_001F_EAX,
        CPUID_8000_0021_EAX,
+       CPUID_LNX_5,
+       NR_CPUID_WORDS,
 };
 
 #define X86_CAP_FMT_NUM "%d:%d"
index ad29984d5e398da425c0516f14b5cf538a023696..ef11aa4cab42536cf1773bf5b787bcf715bd76e9 100644 (file)
@@ -694,6 +694,7 @@ enum sev_cmd_id {
 
 struct kvm_sev_cmd {
        __u32 id;
+       __u32 pad0;
        __u64 data;
        __u32 error;
        __u32 sev_fd;
@@ -704,28 +705,35 @@ struct kvm_sev_launch_start {
        __u32 policy;
        __u64 dh_uaddr;
        __u32 dh_len;
+       __u32 pad0;
        __u64 session_uaddr;
        __u32 session_len;
+       __u32 pad1;
 };
 
 struct kvm_sev_launch_update_data {
        __u64 uaddr;
        __u32 len;
+       __u32 pad0;
 };
 
 
 struct kvm_sev_launch_secret {
        __u64 hdr_uaddr;
        __u32 hdr_len;
+       __u32 pad0;
        __u64 guest_uaddr;
        __u32 guest_len;
+       __u32 pad1;
        __u64 trans_uaddr;
        __u32 trans_len;
+       __u32 pad2;
 };
 
 struct kvm_sev_launch_measure {
        __u64 uaddr;
        __u32 len;
+       __u32 pad0;
 };
 
 struct kvm_sev_guest_status {
@@ -738,33 +746,43 @@ struct kvm_sev_dbg {
        __u64 src_uaddr;
        __u64 dst_uaddr;
        __u32 len;
+       __u32 pad0;
 };
 
 struct kvm_sev_attestation_report {
        __u8 mnonce[16];
        __u64 uaddr;
        __u32 len;
+       __u32 pad0;
 };
 
 struct kvm_sev_send_start {
        __u32 policy;
+       __u32 pad0;
        __u64 pdh_cert_uaddr;
        __u32 pdh_cert_len;
+       __u32 pad1;
        __u64 plat_certs_uaddr;
        __u32 plat_certs_len;
+       __u32 pad2;
        __u64 amd_certs_uaddr;
        __u32 amd_certs_len;
+       __u32 pad3;
        __u64 session_uaddr;
        __u32 session_len;
+       __u32 pad4;
 };
 
 struct kvm_sev_send_update_data {
        __u64 hdr_uaddr;
        __u32 hdr_len;
+       __u32 pad0;
        __u64 guest_uaddr;
        __u32 guest_len;
+       __u32 pad1;
        __u64 trans_uaddr;
        __u32 trans_len;
+       __u32 pad2;
 };
 
 struct kvm_sev_receive_start {
@@ -772,17 +790,22 @@ struct kvm_sev_receive_start {
        __u32 policy;
        __u64 pdh_uaddr;
        __u32 pdh_len;
+       __u32 pad0;
        __u64 session_uaddr;
        __u32 session_len;
+       __u32 pad1;
 };
 
 struct kvm_sev_receive_update_data {
        __u64 hdr_uaddr;
        __u32 hdr_len;
+       __u32 pad0;
        __u64 guest_uaddr;
        __u32 guest_len;
+       __u32 pad1;
        __u64 trans_uaddr;
        __u32 trans_len;
+       __u32 pad2;
 };
 
 #define KVM_X2APIC_API_USE_32BIT_IDS            (1ULL << 0)
index 6bc3456a8ebf1d1a7c83498cbbef2b5bae106b41..a1efa7907a0b100840f6c13c33e5abaa36ef822b 100644 (file)
@@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data {
        __u32 token;
 
        __u8 pad[56];
-       __u32 enabled;
 };
 
 #define KVM_PV_EOI_BIT 0
index 30335182b6b0ae6a4e474c7e93de93e1bd24b2f4..e92ff0c11db8145bd8c06f31e9c5a12c9c3e53d6 100644 (file)
@@ -314,7 +314,7 @@ static bool is_callthunk(void *addr)
        return !bcmp(pad, insn_buff, tmpl_size);
 }
 
-int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip)
 {
        unsigned int tmpl_size = SKL_TMPL_SIZE;
        u8 insn_buff[MAX_PATCH_LEN];
@@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
                return 0;
 
        memcpy(insn_buff, skl_call_thunk_template, tmpl_size);
-       apply_relocation(insn_buff, tmpl_size, *pprog,
+       apply_relocation(insn_buff, tmpl_size, ip,
                         skl_call_thunk_template, tmpl_size);
 
        memcpy(*pprog, insn_buff, tmpl_size);
index 4cadfd606e8e6a9f16a10eec4fdf85ac9b67f575..7f0732bc0ccd2346d7737bb04ea796c730986143 100644 (file)
@@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg)
 
 early_param("no-steal-acc", parse_no_stealacc);
 
+static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled);
 static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
 DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
 static int has_steal_clock = 0;
@@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void)
 {
        u32 flags = 0;
 
-       if (__this_cpu_read(apf_reason.enabled)) {
+       if (__this_cpu_read(async_pf_enabled)) {
                flags = __this_cpu_read(apf_reason.flags);
                __this_cpu_write(apf_reason.flags, 0);
        }
@@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
 
        inc_irq_stat(irq_hv_callback_count);
 
-       if (__this_cpu_read(apf_reason.enabled)) {
+       if (__this_cpu_read(async_pf_enabled)) {
                token = __this_cpu_read(apf_reason.token);
                kvm_async_pf_task_wake(token);
                __this_cpu_write(apf_reason.token, 0);
@@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void)
                wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
 
                wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
-               __this_cpu_write(apf_reason.enabled, 1);
+               __this_cpu_write(async_pf_enabled, true);
                pr_debug("setup async PF for cpu %d\n", smp_processor_id());
        }
 
@@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void)
 
 static void kvm_pv_disable_apf(void)
 {
-       if (!__this_cpu_read(apf_reason.enabled))
+       if (!__this_cpu_read(async_pf_enabled))
                return;
 
        wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
-       __this_cpu_write(apf_reason.enabled, 0);
+       __this_cpu_write(async_pf_enabled, false);
 
        pr_debug("disable async PF for cpu %d\n", smp_processor_id());
 }
index adba49afb5fe63b1de9345579615284593e00468..bfc0bfcb2bc60dd2860fa4617f3dc92dea1f3a97 100644 (file)
@@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2
        return 0;
 }
 
-static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
-                                                           const char *sig)
+static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries,
+                                                             int nent, const char *sig)
 {
        struct kvm_hypervisor_cpuid cpuid = {};
        struct kvm_cpuid_entry2 *entry;
        u32 base;
 
        for_each_possible_hypervisor_cpuid_base(base) {
-               entry = kvm_find_cpuid_entry(vcpu, base);
+               entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
 
                if (entry) {
                        u32 signature[3];
@@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp
        return cpuid;
 }
 
-static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu,
-                                             struct kvm_cpuid_entry2 *entries, int nent)
+static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu,
+                                                           const char *sig)
 {
-       u32 base = vcpu->arch.kvm_cpuid.base;
-
-       if (!base)
-               return NULL;
+       return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries,
+                                         vcpu->arch.cpuid_nent, sig);
+}
 
-       return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES,
+static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries,
+                                                             int nent, u32 kvm_cpuid_base)
+{
+       return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES,
                                 KVM_CPUID_INDEX_NOT_SIGNIFICANT);
 }
 
 static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
 {
-       return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries,
-                                            vcpu->arch.cpuid_nent);
+       u32 base = vcpu->arch.kvm_cpuid.base;
+
+       if (!base)
+               return NULL;
+
+       return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries,
+                                            vcpu->arch.cpuid_nent, base);
 }
 
 void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
@@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
                                       int nent)
 {
        struct kvm_cpuid_entry2 *best;
+       struct kvm_hypervisor_cpuid kvm_cpuid;
 
        best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
        if (best) {
@@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
                     cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
                best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
 
-       best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
-       if (kvm_hlt_in_guest(vcpu->kvm) && best &&
-               (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
-               best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+       kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE);
+       if (kvm_cpuid.base) {
+               best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base);
+               if (kvm_hlt_in_guest(vcpu->kvm) && best)
+                       best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
+       }
 
        if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
                best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
index aadefcaa9561d0a31e589784da7e871e4a0de2e0..58ac8d69c94bd124001861a1b2e06de8f3fd41b8 100644 (file)
@@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = {
  */
 static __always_inline void reverse_cpuid_check(unsigned int x86_leaf)
 {
+       BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS);
        BUILD_BUG_ON(x86_leaf == CPUID_LNX_1);
        BUILD_BUG_ON(x86_leaf == CPUID_LNX_2);
        BUILD_BUG_ON(x86_leaf == CPUID_LNX_3);
        BUILD_BUG_ON(x86_leaf == CPUID_LNX_4);
+       BUILD_BUG_ON(x86_leaf == CPUID_LNX_5);
        BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid));
        BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0);
 }
index ae0ac12382b9278732fc89d4ef00a5f84866f8c0..e5a4d9b0e79fd23e2dfc8224aa7a89d3b243c103 100644 (file)
@@ -84,9 +84,10 @@ struct enc_region {
 };
 
 /* Called with the sev_bitmap_lock held, or on shutdown  */
-static int sev_flush_asids(int min_asid, int max_asid)
+static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid)
 {
-       int ret, asid, error = 0;
+       int ret, error = 0;
+       unsigned int asid;
 
        /* Check if there are any ASIDs to reclaim before performing a flush */
        asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
@@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm)
 }
 
 /* Must be called with the sev_bitmap_lock held */
-static bool __sev_recycle_asids(int min_asid, int max_asid)
+static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid)
 {
        if (sev_flush_asids(min_asid, max_asid))
                return false;
@@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
 
 static int sev_asid_new(struct kvm_sev_info *sev)
 {
-       int asid, min_asid, max_asid, ret;
+       /*
+        * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
+        * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
+        * Note: min ASID can end up larger than the max if basic SEV support is
+        * effectively disabled by disallowing use of ASIDs for SEV guests.
+        */
+       unsigned int min_asid = sev->es_active ? 1 : min_sev_asid;
+       unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
+       unsigned int asid;
        bool retry = true;
+       int ret;
+
+       if (min_asid > max_asid)
+               return -ENOTTY;
 
        WARN_ON(sev->misc_cg);
        sev->misc_cg = get_current_misc_cg();
@@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev)
 
        mutex_lock(&sev_bitmap_lock);
 
-       /*
-        * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
-        * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
-        */
-       min_asid = sev->es_active ? 1 : min_sev_asid;
-       max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
 again:
        asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
        if (asid > max_asid) {
@@ -179,7 +186,8 @@ again:
 
        mutex_unlock(&sev_bitmap_lock);
 
-       return asid;
+       sev->asid = asid;
+       return 0;
 e_uncharge:
        sev_misc_cg_uncharge(sev);
        put_misc_cg(sev->misc_cg);
@@ -187,7 +195,7 @@ e_uncharge:
        return ret;
 }
 
-static int sev_get_asid(struct kvm *kvm)
+static unsigned int sev_get_asid(struct kvm *kvm)
 {
        struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
 
@@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
 {
        struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
        struct sev_platform_init_args init_args = {0};
-       int asid, ret;
+       int ret;
 
        if (kvm->created_vcpus)
                return -EINVAL;
 
-       ret = -EBUSY;
        if (unlikely(sev->active))
-               return ret;
+               return -EINVAL;
 
        sev->active = true;
        sev->es_active = argp->id == KVM_SEV_ES_INIT;
-       asid = sev_asid_new(sev);
-       if (asid < 0)
+       ret = sev_asid_new(sev);
+       if (ret)
                goto e_no_asid;
-       sev->asid = asid;
 
        init_args.probe = false;
        ret = sev_platform_init(&init_args);
@@ -287,8 +293,8 @@ e_no_asid:
 
 static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
 {
+       unsigned int asid = sev_get_asid(kvm);
        struct sev_data_activate activate;
-       int asid = sev_get_asid(kvm);
        int ret;
 
        /* activate ASID on the given handle */
@@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void)
                goto out;
        }
 
-       sev_asid_count = max_sev_asid - min_sev_asid + 1;
-       WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+       if (min_sev_asid <= max_sev_asid) {
+               sev_asid_count = max_sev_asid - min_sev_asid + 1;
+               WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count));
+       }
        sev_supported = true;
 
        /* SEV-ES support requested? */
@@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void)
 out:
        if (boot_cpu_has(X86_FEATURE_SEV))
                pr_info("SEV %s (ASIDs %u - %u)\n",
-                       sev_supported ? "enabled" : "disabled",
+                       sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" :
+                                                                      "unusable" :
+                                                                      "disabled",
                        min_sev_asid, max_sev_asid);
        if (boot_cpu_has(X86_FEATURE_SEV_ES))
                pr_info("SEV-ES %s (ASIDs %u - %u)\n",
@@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd)
  */
 static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va)
 {
-       int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid;
+       unsigned int asid = sev_get_asid(vcpu->kvm);
 
        /*
         * Note!  The address must be a kernel address, as regular page walk
@@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 void pre_sev_run(struct vcpu_svm *svm, int cpu)
 {
        struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
-       int asid = sev_get_asid(svm->vcpu.kvm);
+       unsigned int asid = sev_get_asid(svm->vcpu.kvm);
 
        /* Assign the asid allocated with this SEV guest */
        svm->asid = asid;
index 88659de4d2a7141a6eff8adb28054c8cb0a6c3c2..c6b4b1728006d5021f958516c15b653d7759f4d1 100644 (file)
@@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit,
  * Tracepoint for nested #vmexit because of interrupt pending
  */
 TRACE_EVENT(kvm_invlpga,
-           TP_PROTO(__u64 rip, int asid, u64 address),
+           TP_PROTO(__u64 rip, unsigned int asid, u64 address),
            TP_ARGS(rip, asid, address),
 
        TP_STRUCT__entry(
-               __field(        __u64,  rip     )
-               __field(        int,    asid    )
-               __field(        __u64,  address )
+               __field(        __u64,          rip     )
+               __field(        unsigned int,   asid    )
+               __field(        __u64,          address )
        ),
 
        TP_fast_assign(
@@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga,
                __entry->address        =       address;
        ),
 
-       TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx",
+       TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx",
                  __entry->rip, __entry->asid, __entry->address)
 );
 
index 02cde194a99e691c54b6252a31b3d9ca7f8e7e42..0795b3464058b0e515cb71d7bf84fa9908e6fc07 100644 (file)
@@ -228,8 +228,11 @@ SYM_CODE_END(srso_return_thunk)
 #else /* !CONFIG_MITIGATION_SRSO */
 /* Dummy for the alternative in CALL_UNTRAIN_RET. */
 SYM_CODE_START(srso_alias_untrain_ret)
-       RET
+       ANNOTATE_UNRET_SAFE
+       ret
+       int3
 SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
 #define JMP_SRSO_UNTRAIN_RET "ud2"
 #endif /* CONFIG_MITIGATION_SRSO */
 
index a7ba8e1786452db2442eaeeee8d3996c19f3e3b6..df5fac428408fe65ecc03766def03e0959bc539a 100644 (file)
@@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip)
 static int emit_rsb_call(u8 **pprog, void *func, void *ip)
 {
        OPTIMIZER_HIDE_VAR(func);
-       x86_call_depth_emit_accounting(pprog, func);
+       ip += x86_call_depth_emit_accounting(pprog, func, ip);
        return emit_patch(pprog, func, ip, 0xE8);
 }
 
@@ -1972,20 +1972,17 @@ populate_extable:
 
                        /* call */
                case BPF_JMP | BPF_CALL: {
-                       int offs;
+                       u8 *ip = image + addrs[i - 1];
 
                        func = (u8 *) __bpf_call_base + imm32;
                        if (tail_call_reachable) {
                                RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
-                               if (!imm32)
-                                       return -EINVAL;
-                               offs = 7 + x86_call_depth_emit_accounting(&prog, func);
-                       } else {
-                               if (!imm32)
-                                       return -EINVAL;
-                               offs = x86_call_depth_emit_accounting(&prog, func);
+                               ip += 7;
                        }
-                       if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+                       if (!imm32)
+                               return -EINVAL;
+                       ip += x86_call_depth_emit_accounting(&prog, func, ip);
+                       if (emit_call(&prog, func, ip))
                                return -EINVAL;
                        break;
                }
@@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
                 * Direct-call fentry stub, as such it needs accounting for the
                 * __fentry__ call.
                 */
-               x86_call_depth_emit_accounting(&prog, NULL);
+               x86_call_depth_emit_accounting(&prog, NULL, image);
        }
        EMIT1(0x55);             /* push rbp */
        EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
index 7a5f611c3d2e3e83eb00be12b9131f49d8348f5e..b8e32d933a6369aebbffe36e2ce3165cc2288bef 100644 (file)
@@ -583,9 +583,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder,
        mutex_unlock(&bdev->bd_holder_lock);
        bd_clear_claiming(whole, holder);
        mutex_unlock(&bdev_lock);
-
-       if (hops && hops->get_holder)
-               hops->get_holder(holder);
 }
 
 /**
@@ -608,7 +605,6 @@ EXPORT_SYMBOL(bd_abort_claiming);
 static void bd_end_claim(struct block_device *bdev, void *holder)
 {
        struct block_device *whole = bdev_whole(bdev);
-       const struct blk_holder_ops *hops = bdev->bd_holder_ops;
        bool unblock = false;
 
        /*
@@ -631,9 +627,6 @@ static void bd_end_claim(struct block_device *bdev, void *holder)
                whole->bd_holder = NULL;
        mutex_unlock(&bdev_lock);
 
-       if (hops && hops->put_holder)
-               hops->put_holder(holder);
-
        /*
         * If this was the last claim, remove holder link and unblock evpoll if
         * it was a write holder.
@@ -776,17 +769,17 @@ void blkdev_put_no_open(struct block_device *bdev)
 
 static bool bdev_writes_blocked(struct block_device *bdev)
 {
-       return bdev->bd_writers == -1;
+       return bdev->bd_writers < 0;
 }
 
 static void bdev_block_writes(struct block_device *bdev)
 {
-       bdev->bd_writers = -1;
+       bdev->bd_writers--;
 }
 
 static void bdev_unblock_writes(struct block_device *bdev)
 {
-       bdev->bd_writers = 0;
+       bdev->bd_writers++;
 }
 
 static bool bdev_may_open(struct block_device *bdev, blk_mode_t mode)
@@ -813,6 +806,11 @@ static void bdev_claim_write_access(struct block_device *bdev, blk_mode_t mode)
                bdev->bd_writers++;
 }
 
+static inline bool bdev_unclaimed(const struct file *bdev_file)
+{
+       return bdev_file->private_data == BDEV_I(bdev_file->f_mapping->host);
+}
+
 static void bdev_yield_write_access(struct file *bdev_file)
 {
        struct block_device *bdev;
@@ -820,14 +818,15 @@ static void bdev_yield_write_access(struct file *bdev_file)
        if (bdev_allow_write_mounted)
                return;
 
+       if (bdev_unclaimed(bdev_file))
+               return;
+
        bdev = file_bdev(bdev_file);
-       /* Yield exclusive or shared write access. */
-       if (bdev_file->f_mode & FMODE_WRITE) {
-               if (bdev_writes_blocked(bdev))
-                       bdev_unblock_writes(bdev);
-               else
-                       bdev->bd_writers--;
-       }
+
+       if (bdev_file->f_mode & FMODE_WRITE_RESTRICTED)
+               bdev_unblock_writes(bdev);
+       else if (bdev_file->f_mode & FMODE_WRITE)
+               bdev->bd_writers--;
 }
 
 /**
@@ -907,6 +906,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
        bdev_file->f_mode |= FMODE_BUF_RASYNC | FMODE_CAN_ODIRECT;
        if (bdev_nowait(bdev))
                bdev_file->f_mode |= FMODE_NOWAIT;
+       if (mode & BLK_OPEN_RESTRICT_WRITES)
+               bdev_file->f_mode |= FMODE_WRITE_RESTRICTED;
        bdev_file->f_mapping = bdev->bd_inode->i_mapping;
        bdev_file->f_wb_err = filemap_sample_wb_err(bdev_file->f_mapping);
        bdev_file->private_data = holder;
@@ -1012,6 +1013,20 @@ struct file *bdev_file_open_by_path(const char *path, blk_mode_t mode,
 }
 EXPORT_SYMBOL(bdev_file_open_by_path);
 
+static inline void bd_yield_claim(struct file *bdev_file)
+{
+       struct block_device *bdev = file_bdev(bdev_file);
+       void *holder = bdev_file->private_data;
+
+       lockdep_assert_held(&bdev->bd_disk->open_mutex);
+
+       if (WARN_ON_ONCE(IS_ERR_OR_NULL(holder)))
+               return;
+
+       if (!bdev_unclaimed(bdev_file))
+               bd_end_claim(bdev, holder);
+}
+
 void bdev_release(struct file *bdev_file)
 {
        struct block_device *bdev = file_bdev(bdev_file);
@@ -1036,7 +1051,7 @@ void bdev_release(struct file *bdev_file)
        bdev_yield_write_access(bdev_file);
 
        if (holder)
-               bd_end_claim(bdev, holder);
+               bd_yield_claim(bdev_file);
 
        /*
         * Trigger event checking and tell drivers to flush MEDIA_CHANGE
@@ -1056,6 +1071,39 @@ put_no_open:
        blkdev_put_no_open(bdev);
 }
 
+/**
+ * bdev_fput - yield claim to the block device and put the file
+ * @bdev_file: open block device
+ *
+ * Yield claim on the block device and put the file. Ensure that the
+ * block device can be reclaimed before the file is closed which is a
+ * deferred operation.
+ */
+void bdev_fput(struct file *bdev_file)
+{
+       if (WARN_ON_ONCE(bdev_file->f_op != &def_blk_fops))
+               return;
+
+       if (bdev_file->private_data) {
+               struct block_device *bdev = file_bdev(bdev_file);
+               struct gendisk *disk = bdev->bd_disk;
+
+               mutex_lock(&disk->open_mutex);
+               bdev_yield_write_access(bdev_file);
+               bd_yield_claim(bdev_file);
+               /*
+                * Tell release we already gave up our hold on the
+                * device and if write restrictions are available that
+                * we already gave up write access to the device.
+                */
+               bdev_file->private_data = BDEV_I(bdev_file->f_mapping->host);
+               mutex_unlock(&disk->open_mutex);
+       }
+
+       fput(bdev_file);
+}
+EXPORT_SYMBOL(bdev_fput);
+
 /**
  * lookup_bdev() - Look up a struct block_device by name.
  * @pathname: Name of the block device in the filesystem.
index 302dce0b2b5044e20489f4b34bb8f4fde189e597..d67881b50bca28a1e08bb494b00c2bf0ee44957b 100644 (file)
@@ -662,14 +662,15 @@ static int acpi_thermal_register_thermal_zone(struct acpi_thermal *tz,
 {
        int result;
 
-       tz->thermal_zone = thermal_zone_device_register_with_trips("acpitz",
-                                                                  trip_table,
-                                                                  trip_count,
-                                                                  tz,
-                                                                  &acpi_thermal_zone_ops,
-                                                                  NULL,
-                                                                  passive_delay,
-                                                                  tz->polling_frequency * 100);
+       if (trip_count)
+               tz->thermal_zone = thermal_zone_device_register_with_trips(
+                                       "acpitz", trip_table, trip_count, tz,
+                                       &acpi_thermal_zone_ops, NULL, passive_delay,
+                                       tz->polling_frequency * 100);
+       else
+               tz->thermal_zone = thermal_tripless_zone_device_register(
+                                       "acpitz", tz, &acpi_thermal_zone_ops, NULL);
+
        if (IS_ERR(tz->thermal_zone))
                return PTR_ERR(tz->thermal_zone);
 
@@ -901,11 +902,8 @@ static int acpi_thermal_add(struct acpi_device *device)
                trip++;
        }
 
-       if (trip == trip_table) {
+       if (trip == trip_table)
                pr_warn(FW_BUG "No valid trip points!\n");
-               result = -ENODEV;
-               goto free_memory;
-       }
 
        result = acpi_thermal_register_thermal_zone(tz, trip_table,
                                                    trip - trip_table,
index d4a626f87963ba123a4f07a366c28681db4714fa..79a8b0aa37bf37fa8eb44e2dcba7181dfb2222b0 100644 (file)
@@ -30,7 +30,6 @@
 #define ST_AHCI_OOBR_CIMAX_SHIFT       0
 
 struct st_ahci_drv_data {
-       struct platform_device *ahci;
        struct reset_control *pwr;
        struct reset_control *sw_rst;
        struct reset_control *pwr_rst;
index 4ac854f6b05777c669d7de39ab006d963b74bd48..88b2e9817f49dfd200a0f58835a9344cab1e2818 100644 (file)
@@ -1371,9 +1371,6 @@ static struct pci_driver pata_macio_pci_driver = {
        .suspend        = pata_macio_pci_suspend,
        .resume         = pata_macio_pci_resume,
 #endif
-       .driver = {
-               .owner          = THIS_MODULE,
-       },
 };
 MODULE_DEVICE_TABLE(pci, pata_macio_pci_match);
 
index 400b22ee99c33affba7b25ae46de0a2014bfd71f..4c270999ba3ccd9dd70175b02886998cc47e99a9 100644 (file)
@@ -200,7 +200,10 @@ int gemini_sata_start_bridge(struct sata_gemini *sg, unsigned int bridge)
                pclk = sg->sata0_pclk;
        else
                pclk = sg->sata1_pclk;
-       clk_enable(pclk);
+       ret = clk_enable(pclk);
+       if (ret)
+               return ret;
+
        msleep(10);
 
        /* Do not keep clocking a bridge that is not online */
index e82786c63fbd73decc4af68d1a3aff1113411a27..9bec0aee92e04c412fec0abe4ac30173950890fb 100644 (file)
@@ -787,37 +787,6 @@ static const struct ata_port_info mv_port_info[] = {
        },
 };
 
-static const struct pci_device_id mv_pci_tbl[] = {
-       { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
-       { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
-       { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
-       { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
-       /* RocketRAID 1720/174x have different identifiers */
-       { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
-       { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
-       { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
-
-       { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
-       { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
-       { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
-       { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
-       { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
-
-       { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
-
-       /* Adaptec 1430SA */
-       { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
-
-       /* Marvell 7042 support */
-       { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
-
-       /* Highpoint RocketRAID PCIe series */
-       { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
-       { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
-
-       { }                     /* terminate list */
-};
-
 static const struct mv_hw_ops mv5xxx_ops = {
        .phy_errata             = mv5_phy_errata,
        .enable_leds            = mv5_enable_leds,
@@ -4303,6 +4272,36 @@ static int mv_pci_init_one(struct pci_dev *pdev,
 static int mv_pci_device_resume(struct pci_dev *pdev);
 #endif
 
+static const struct pci_device_id mv_pci_tbl[] = {
+       { PCI_VDEVICE(MARVELL, 0x5040), chip_504x },
+       { PCI_VDEVICE(MARVELL, 0x5041), chip_504x },
+       { PCI_VDEVICE(MARVELL, 0x5080), chip_5080 },
+       { PCI_VDEVICE(MARVELL, 0x5081), chip_508x },
+       /* RocketRAID 1720/174x have different identifiers */
+       { PCI_VDEVICE(TTI, 0x1720), chip_6042 },
+       { PCI_VDEVICE(TTI, 0x1740), chip_6042 },
+       { PCI_VDEVICE(TTI, 0x1742), chip_6042 },
+
+       { PCI_VDEVICE(MARVELL, 0x6040), chip_604x },
+       { PCI_VDEVICE(MARVELL, 0x6041), chip_604x },
+       { PCI_VDEVICE(MARVELL, 0x6042), chip_6042 },
+       { PCI_VDEVICE(MARVELL, 0x6080), chip_608x },
+       { PCI_VDEVICE(MARVELL, 0x6081), chip_608x },
+
+       { PCI_VDEVICE(ADAPTEC2, 0x0241), chip_604x },
+
+       /* Adaptec 1430SA */
+       { PCI_VDEVICE(ADAPTEC2, 0x0243), chip_7042 },
+
+       /* Marvell 7042 support */
+       { PCI_VDEVICE(MARVELL, 0x7042), chip_7042 },
+
+       /* Highpoint RocketRAID PCIe series */
+       { PCI_VDEVICE(TTI, 0x2300), chip_7042 },
+       { PCI_VDEVICE(TTI, 0x2310), chip_7042 },
+
+       { }                     /* terminate list */
+};
 
 static struct pci_driver mv_pci_driver = {
        .name                   = DRV_NAME,
@@ -4315,6 +4314,7 @@ static struct pci_driver mv_pci_driver = {
 #endif
 
 };
+MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
 
 /**
  *      mv_print_info - Dump key info to kernel log for perusal.
@@ -4487,7 +4487,6 @@ static void __exit mv_exit(void)
 MODULE_AUTHOR("Brett Russ");
 MODULE_DESCRIPTION("SCSI low-level driver for Marvell SATA controllers");
 MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, mv_pci_tbl);
 MODULE_VERSION(DRV_VERSION);
 MODULE_ALIAS("platform:" DRV_NAME);
 
index b51d7a9d0d90ce0a6c72fe841aad222708c046e1..a482741eb181ffca923519ba7d8ab5e73da1e176 100644 (file)
@@ -957,8 +957,7 @@ static void pdc20621_get_from_dimm(struct ata_host *host, void *psource,
 
        offset -= (idx * window_size);
        idx++;
-       dist = ((long) (window_size - (offset + size))) >= 0 ? size :
-               (long) (window_size - offset);
+       dist = min(size, window_size - offset);
        memcpy_fromio(psource, dimm_mmio + offset / 4, dist);
 
        psource += dist;
@@ -1005,8 +1004,7 @@ static void pdc20621_put_to_dimm(struct ata_host *host, void *psource,
        readl(mmio + PDC_DIMM_WINDOW_CTLR);
        offset -= (idx * window_size);
        idx++;
-       dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size :
-               (long) (window_size - offset);
+       dist = min(size, window_size - offset);
        memcpy_toio(dimm_mmio + offset / 4, psource, dist);
        writel(0x01, mmio + PDC_GENERAL_CTLR);
        readl(mmio + PDC_GENERAL_CTLR);
index b40b32fa7f1c38c5d12931ee7b06e5b8ab144d77..19cfc342fc7bbb67af65cb4de10e074622a991a4 100644 (file)
@@ -826,11 +826,15 @@ EXPORT_SYMBOL_GPL(qca_uart_setup);
 
 int qca_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
 {
+       bdaddr_t bdaddr_swapped;
        struct sk_buff *skb;
        int err;
 
-       skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6, bdaddr,
-                               HCI_EV_VENDOR, HCI_INIT_TIMEOUT);
+       baswap(&bdaddr_swapped, bdaddr);
+
+       skb = __hci_cmd_sync_ev(hdev, EDL_WRITE_BD_ADDR_OPCODE, 6,
+                               &bdaddr_swapped, HCI_EV_VENDOR,
+                               HCI_INIT_TIMEOUT);
        if (IS_ERR(skb)) {
                err = PTR_ERR(skb);
                bt_dev_err(hdev, "QCA Change address cmd failed (%d)", err);
index 8a60ad7acd7052b7faa6784ceaca1d417002e4a1..ecbc52eaf1010912b9024ddbc3c87aac4254e1e3 100644 (file)
@@ -7,7 +7,6 @@
  *
  *  Copyright (C) 2007 Texas Instruments, Inc.
  *  Copyright (c) 2010, 2012, 2018 The Linux Foundation. All rights reserved.
- *  Copyright (c) 2023 Qualcomm Innovation Center, Inc. All rights reserved.
  *
  *  Acknowledgements:
  *  This file is based on hci_ll.c, which was...
@@ -226,6 +225,7 @@ struct qca_serdev {
        struct qca_power *bt_power;
        u32 init_speed;
        u32 oper_speed;
+       bool bdaddr_property_broken;
        const char *firmware_name;
 };
 
@@ -1843,6 +1843,7 @@ static int qca_setup(struct hci_uart *hu)
        const char *firmware_name = qca_get_firmware_name(hu);
        int ret;
        struct qca_btsoc_version ver;
+       struct qca_serdev *qcadev;
        const char *soc_name;
 
        ret = qca_check_speeds(hu);
@@ -1904,16 +1905,11 @@ retry:
        case QCA_WCN6750:
        case QCA_WCN6855:
        case QCA_WCN7850:
+               set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
 
-               /* Set BDA quirk bit for reading BDA value from fwnode property
-                * only if that property exist in DT.
-                */
-               if (fwnode_property_present(dev_fwnode(hdev->dev.parent), "local-bd-address")) {
-                       set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
-                       bt_dev_info(hdev, "setting quirk bit to read BDA from fwnode later");
-               } else {
-                       bt_dev_dbg(hdev, "local-bd-address` is not present in the devicetree so not setting quirk bit for BDA");
-               }
+               qcadev = serdev_device_get_drvdata(hu->serdev);
+               if (qcadev->bdaddr_property_broken)
+                       set_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks);
 
                hci_set_aosp_capable(hdev);
 
@@ -2295,6 +2291,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
        if (!qcadev->oper_speed)
                BT_DBG("UART will pick default operating speed");
 
+       qcadev->bdaddr_property_broken = device_property_read_bool(&serdev->dev,
+                       "qcom,local-bd-address-broken");
+
        if (data)
                qcadev->btsoc_type = data->soc_type;
        else
index fa96356102510967cb30538375c9348d122c3910..d09c7d72836551ab510031179a9b95340cd3fb36 100644 (file)
@@ -728,6 +728,25 @@ static u32 line_event_id(int level)
                       GPIO_V2_LINE_EVENT_FALLING_EDGE;
 }
 
+static inline char *make_irq_label(const char *orig)
+{
+       char *new;
+
+       if (!orig)
+               return NULL;
+
+       new = kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
+       if (!new)
+               return ERR_PTR(-ENOMEM);
+
+       return new;
+}
+
+static inline void free_irq_label(const char *label)
+{
+       kfree(label);
+}
+
 #ifdef CONFIG_HTE
 
 static enum hte_return process_hw_ts_thread(void *p)
@@ -1015,6 +1034,7 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
 {
        unsigned long irqflags;
        int ret, level, irq;
+       char *label;
 
        /* try hardware */
        ret = gpiod_set_debounce(line->desc, debounce_period_us);
@@ -1037,11 +1057,17 @@ static int debounce_setup(struct line *line, unsigned int debounce_period_us)
                        if (irq < 0)
                                return -ENXIO;
 
+                       label = make_irq_label(line->req->label);
+                       if (IS_ERR(label))
+                               return -ENOMEM;
+
                        irqflags = IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING;
                        ret = request_irq(irq, debounce_irq_handler, irqflags,
-                                         line->req->label, line);
-                       if (ret)
+                                         label, line);
+                       if (ret) {
+                               free_irq_label(label);
                                return ret;
+                       }
                        line->irq = irq;
                } else {
                        ret = hte_edge_setup(line, GPIO_V2_LINE_FLAG_EDGE_BOTH);
@@ -1083,16 +1109,6 @@ static u32 gpio_v2_line_config_debounce_period(struct gpio_v2_line_config *lc,
        return 0;
 }
 
-static inline char *make_irq_label(const char *orig)
-{
-       return kstrdup_and_replace(orig, '/', ':', GFP_KERNEL);
-}
-
-static inline void free_irq_label(const char *label)
-{
-       kfree(label);
-}
-
 static void edge_detector_stop(struct line *line)
 {
        if (line->irq) {
@@ -1158,8 +1174,8 @@ static int edge_detector_setup(struct line *line,
        irqflags |= IRQF_ONESHOT;
 
        label = make_irq_label(line->req->label);
-       if (!label)
-               return -ENOMEM;
+       if (IS_ERR(label))
+               return PTR_ERR(label);
 
        /* Request a thread to read the events */
        ret = request_threaded_irq(irq, edge_irq_handler, edge_irq_thread,
@@ -2217,8 +2233,8 @@ static int lineevent_create(struct gpio_device *gdev, void __user *ip)
                goto out_free_le;
 
        label = make_irq_label(le->label);
-       if (!label) {
-               ret = -ENOMEM;
+       if (IS_ERR(label)) {
+               ret = PTR_ERR(label);
                goto out_free_le;
        }
 
index 59ccf9a3e1539c93e705139fb2c4a1f7db166001..94903fc1c1459f9fd26eba62628037492e202620 100644 (file)
@@ -1175,6 +1175,9 @@ struct gpio_device *gpio_device_find(const void *data,
 
        list_for_each_entry_srcu(gdev, &gpio_devices, list,
                                 srcu_read_lock_held(&gpio_devices_srcu)) {
+               if (!device_is_registered(&gdev->dev))
+                       continue;
+
                guard(srcu)(&gdev->srcu);
 
                gc = srcu_dereference(gdev->chip, &gdev->srcu);
index bd61e20770a5be20b8978be47a3ba2eaae0c3289..14a2a8473682b00a84e5a0e3907969e719fa5019 100644 (file)
@@ -52,7 +52,7 @@
  * @adapter: I2C adapter for the DDC bus
  * @offset: register offset
  * @buffer: buffer for return data
- * @size: sizo of the buffer
+ * @size: size of the buffer
  *
  * Reads @size bytes from the DP dual mode adaptor registers
  * starting at @offset.
@@ -116,7 +116,7 @@ EXPORT_SYMBOL(drm_dp_dual_mode_read);
  * @adapter: I2C adapter for the DDC bus
  * @offset: register offset
  * @buffer: buffer for write data
- * @size: sizo of the buffer
+ * @size: size of the buffer
  *
  * Writes @size bytes to the DP dual mode adaptor registers
  * starting at @offset.
index 7352bde299d54767fecb34232cb5941a01d6ea88..03bd3c7bd0dc2cf833decec93ce2186cf955a9bd 100644 (file)
@@ -582,7 +582,12 @@ int drm_gem_map_attach(struct dma_buf *dma_buf,
 {
        struct drm_gem_object *obj = dma_buf->priv;
 
-       if (!obj->funcs->get_sg_table)
+       /*
+        * drm_gem_map_dma_buf() requires obj->get_sg_table(), but drivers
+        * that implement their own ->map_dma_buf() do not.
+        */
+       if (dma_buf->ops->map_dma_buf == drm_gem_map_dma_buf &&
+           !obj->funcs->get_sg_table)
                return -ENOSYS;
 
        return drm_gem_pin(obj);
index 4c2f85632391a669c35012439094250e5e9c6dc5..fba73c38e23569fa521e387484b96eadfb988d80 100644 (file)
@@ -118,6 +118,7 @@ gt-y += \
        gt/intel_ggtt_fencing.o \
        gt/intel_gt.o \
        gt/intel_gt_buffer_pool.o \
+       gt/intel_gt_ccs_mode.o \
        gt/intel_gt_clock_utils.o \
        gt/intel_gt_debugfs.o \
        gt/intel_gt_engines_debugfs.o \
index ab2f52d21bad8bad22c184cce6aefac8b0ce5a29..8af9e6128277af050fb95b2902615c4ce9678592 100644 (file)
@@ -2709,15 +2709,6 @@ static void intel_set_pipe_src_size(const struct intel_crtc_state *crtc_state)
         */
        intel_de_write(dev_priv, PIPESRC(pipe),
                       PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
-
-       if (!crtc_state->enable_psr2_su_region_et)
-               return;
-
-       width = drm_rect_width(&crtc_state->psr2_su_area);
-       height = drm_rect_height(&crtc_state->psr2_su_area);
-
-       intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(pipe),
-                      PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1));
 }
 
 static bool intel_pipe_is_interlaced(const struct intel_crtc_state *crtc_state)
index fe42688137863ca8dc95057aa3fffe5ada026211..9b1bce2624b9ea1a8e45bcc5b76e4621703d073e 100644 (file)
@@ -47,6 +47,7 @@ struct drm_printer;
 #define HAS_DPT(i915)                  (DISPLAY_VER(i915) >= 13)
 #define HAS_DSB(i915)                  (DISPLAY_INFO(i915)->has_dsb)
 #define HAS_DSC(__i915)                        (DISPLAY_RUNTIME_INFO(__i915)->has_dsc)
+#define HAS_DSC_MST(__i915)            (DISPLAY_VER(__i915) >= 12 && HAS_DSC(__i915))
 #define HAS_FBC(i915)                  (DISPLAY_RUNTIME_INFO(i915)->fbc_mask != 0)
 #define HAS_FPGA_DBG_UNCLAIMED(i915)   (DISPLAY_INFO(i915)->has_fpga_dbg)
 #define HAS_FW_BLC(i915)               (DISPLAY_VER(i915) >= 3)
index 9104f18753b484fde2b439f85494fc77a3d27c87..bf3f942e19c3d38a314d2e5c5065dbb73b36682f 100644 (file)
@@ -1423,6 +1423,8 @@ struct intel_crtc_state {
 
        u32 psr2_man_track_ctl;
 
+       u32 pipe_srcsz_early_tpt;
+
        struct drm_rect psr2_su_area;
 
        /* Variable Refresh Rate state */
index f98ef4b42a448f57d5dfaf0459cba23a00946870..abd62bebc46d0e58d5bc78d8f4500ddcbc6098f1 100644 (file)
@@ -499,7 +499,7 @@ intel_dp_set_source_rates(struct intel_dp *intel_dp)
        /* The values must be in increasing order */
        static const int mtl_rates[] = {
                162000, 216000, 243000, 270000, 324000, 432000, 540000, 675000,
-               810000, 1000000, 1350000, 2000000,
+               810000, 1000000, 2000000,
        };
        static const int icl_rates[] = {
                162000, 216000, 270000, 324000, 432000, 540000, 648000, 810000,
@@ -1422,7 +1422,8 @@ static bool intel_dp_source_supports_fec(struct intel_dp *intel_dp,
        if (DISPLAY_VER(dev_priv) >= 12)
                return true;
 
-       if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A)
+       if (DISPLAY_VER(dev_priv) == 11 && encoder->port != PORT_A &&
+           !intel_crtc_has_type(pipe_config, INTEL_OUTPUT_DP_MST))
                return true;
 
        return false;
@@ -1917,8 +1918,9 @@ icl_dsc_compute_link_config(struct intel_dp *intel_dp,
        dsc_max_bpp = min(dsc_max_bpp, pipe_bpp - 1);
 
        for (i = 0; i < ARRAY_SIZE(valid_dsc_bpp); i++) {
-               if (valid_dsc_bpp[i] < dsc_min_bpp ||
-                   valid_dsc_bpp[i] > dsc_max_bpp)
+               if (valid_dsc_bpp[i] < dsc_min_bpp)
+                       continue;
+               if (valid_dsc_bpp[i] > dsc_max_bpp)
                        break;
 
                ret = dsc_compute_link_config(intel_dp,
@@ -6557,6 +6559,7 @@ intel_dp_init_connector(struct intel_digital_port *dig_port,
                intel_connector->get_hw_state = intel_ddi_connector_get_hw_state;
        else
                intel_connector->get_hw_state = intel_connector_get_hw_state;
+       intel_connector->sync_state = intel_dp_connector_sync_state;
 
        if (!intel_edp_init_connector(intel_dp, intel_connector)) {
                intel_dp_aux_fini(intel_dp);
index 53aec023ce92fae91e653adf9278b6a81eae3040..b651c990af85f70b17510effdfdba35235dbf51f 100644 (file)
@@ -1355,7 +1355,7 @@ intel_dp_mst_mode_valid_ctx(struct drm_connector *connector,
                return 0;
        }
 
-       if (DISPLAY_VER(dev_priv) >= 10 &&
+       if (HAS_DSC_MST(dev_priv) &&
            drm_dp_sink_supports_dsc(intel_connector->dp.dsc_dpcd)) {
                /*
                 * TBD pass the connector BPC,
index 6927785fd6ff2fed2406a6ca1889cdf455f548e7..b6e539f1342c29ad97f5f46de8b51d9a358375bb 100644 (file)
@@ -1994,6 +1994,7 @@ static void psr_force_hw_tracking_exit(struct intel_dp *intel_dp)
 
 void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_state)
 {
+       struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
        struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
        enum transcoder cpu_transcoder = crtc_state->cpu_transcoder;
        struct intel_encoder *encoder;
@@ -2013,6 +2014,12 @@ void intel_psr2_program_trans_man_trk_ctl(const struct intel_crtc_state *crtc_st
 
        intel_de_write(dev_priv, PSR2_MAN_TRK_CTL(cpu_transcoder),
                       crtc_state->psr2_man_track_ctl);
+
+       if (!crtc_state->enable_psr2_su_region_et)
+               return;
+
+       intel_de_write(dev_priv, PIPE_SRCSZ_ERLY_TPT(crtc->pipe),
+                      crtc_state->pipe_srcsz_early_tpt);
 }
 
 static void psr2_man_trk_ctl_calc(struct intel_crtc_state *crtc_state,
@@ -2051,6 +2058,20 @@ exit:
        crtc_state->psr2_man_track_ctl = val;
 }
 
+static u32 psr2_pipe_srcsz_early_tpt_calc(struct intel_crtc_state *crtc_state,
+                                         bool full_update)
+{
+       int width, height;
+
+       if (!crtc_state->enable_psr2_su_region_et || full_update)
+               return 0;
+
+       width = drm_rect_width(&crtc_state->psr2_su_area);
+       height = drm_rect_height(&crtc_state->psr2_su_area);
+
+       return PIPESRC_WIDTH(width - 1) | PIPESRC_HEIGHT(height - 1);
+}
+
 static void clip_area_update(struct drm_rect *overlap_damage_area,
                             struct drm_rect *damage_area,
                             struct drm_rect *pipe_src)
@@ -2095,21 +2116,36 @@ static void intel_psr2_sel_fetch_pipe_alignment(struct intel_crtc_state *crtc_st
  * cursor fully when cursor is in SU area.
  */
 static void
-intel_psr2_sel_fetch_et_alignment(struct intel_crtc_state *crtc_state,
-                                 struct intel_plane_state *cursor_state)
+intel_psr2_sel_fetch_et_alignment(struct intel_atomic_state *state,
+                                 struct intel_crtc *crtc)
 {
-       struct drm_rect inter;
+       struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
+       struct intel_plane_state *new_plane_state;
+       struct intel_plane *plane;
+       int i;
 
-       if (!crtc_state->enable_psr2_su_region_et ||
-           !cursor_state->uapi.visible)
+       if (!crtc_state->enable_psr2_su_region_et)
                return;
 
-       inter = crtc_state->psr2_su_area;
-       if (!drm_rect_intersect(&inter, &cursor_state->uapi.dst))
-               return;
+       for_each_new_intel_plane_in_state(state, plane, new_plane_state, i) {
+               struct drm_rect inter;
 
-       clip_area_update(&crtc_state->psr2_su_area, &cursor_state->uapi.dst,
-                        &crtc_state->pipe_src);
+               if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc)
+                       continue;
+
+               if (plane->id != PLANE_CURSOR)
+                       continue;
+
+               if (!new_plane_state->uapi.visible)
+                       continue;
+
+               inter = crtc_state->psr2_su_area;
+               if (!drm_rect_intersect(&inter, &new_plane_state->uapi.dst))
+                       continue;
+
+               clip_area_update(&crtc_state->psr2_su_area, &new_plane_state->uapi.dst,
+                                &crtc_state->pipe_src);
+       }
 }
 
 /*
@@ -2152,8 +2188,7 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 {
        struct drm_i915_private *dev_priv = to_i915(state->base.dev);
        struct intel_crtc_state *crtc_state = intel_atomic_get_new_crtc_state(state, crtc);
-       struct intel_plane_state *new_plane_state, *old_plane_state,
-               *cursor_plane_state = NULL;
+       struct intel_plane_state *new_plane_state, *old_plane_state;
        struct intel_plane *plane;
        bool full_update = false;
        int i, ret;
@@ -2238,13 +2273,6 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
                damaged_area.x2 += new_plane_state->uapi.dst.x1 - src.x1;
 
                clip_area_update(&crtc_state->psr2_su_area, &damaged_area, &crtc_state->pipe_src);
-
-               /*
-                * Cursor plane new state is stored to adjust su area to cover
-                * cursor are fully.
-                */
-               if (plane->id == PLANE_CURSOR)
-                       cursor_plane_state = new_plane_state;
        }
 
        /*
@@ -2273,9 +2301,13 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
        if (ret)
                return ret;
 
-       /* Adjust su area to cover cursor fully as necessary */
-       if (cursor_plane_state)
-               intel_psr2_sel_fetch_et_alignment(crtc_state, cursor_plane_state);
+       /*
+        * Adjust su area to cover cursor fully as necessary (early
+        * transport). This needs to be done after
+        * drm_atomic_add_affected_planes to ensure visible cursor is added into
+        * affected planes even when cursor is not updated by itself.
+        */
+       intel_psr2_sel_fetch_et_alignment(state, crtc);
 
        intel_psr2_sel_fetch_pipe_alignment(crtc_state);
 
@@ -2338,6 +2370,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state *state,
 
 skip_sel_fetch_set_loop:
        psr2_man_trk_ctl_calc(crtc_state, full_update);
+       crtc_state->pipe_srcsz_early_tpt =
+               psr2_pipe_srcsz_early_tpt_calc(crtc_state, full_update);
        return 0;
 }
 
index fa46d2308b0ed3b0d6bd5054f7ffbf4f5701128a..81bf2216371be6a5e16fe15a1bc23ef6c0b5b46c 100644 (file)
@@ -961,6 +961,9 @@ static int gen8_init_rsvd(struct i915_address_space *vm)
        struct i915_vma *vma;
        int ret;
 
+       if (!intel_gt_needs_wa_16018031267(vm->gt))
+               return 0;
+
        /* The memory will be used only by GPU. */
        obj = i915_gem_object_create_lmem(i915, PAGE_SIZE,
                                          I915_BO_ALLOC_VOLATILE |
index 1ade568ffbfa43409129228881abe60d965e8d10..7a6dc371c384eb3d1f2639d5a767072a3bc554f4 100644 (file)
@@ -908,6 +908,23 @@ static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
                info->engine_mask &= ~BIT(GSC0);
        }
 
+       /*
+        * Do not create the command streamer for CCS slices beyond the first.
+        * All the workload submitted to the first engine will be shared among
+        * all the slices.
+        *
+        * Once the user will be allowed to customize the CCS mode, then this
+        * check needs to be removed.
+        */
+       if (IS_DG2(gt->i915)) {
+               u8 first_ccs = __ffs(CCS_MASK(gt));
+
+               /* Mask off all the CCS engine */
+               info->engine_mask &= ~GENMASK(CCS3, CCS0);
+               /* Put back in the first CCS engine */
+               info->engine_mask |= BIT(_CCS(first_ccs));
+       }
+
        return info->engine_mask;
 }
 
index a425db5ed3a22c38af996ce2183d6fa030ed60b2..6a2c2718bcc38e645903031ce00cd667c1ee5411 100644 (file)
@@ -1024,6 +1024,12 @@ enum i915_map_type intel_gt_coherent_map_type(struct intel_gt *gt,
                return I915_MAP_WC;
 }
 
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt)
+{
+       /* Wa_16018031267, Wa_16018063123 */
+       return IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 55), IP_VER(12, 71));
+}
+
 bool intel_gt_needs_wa_22016122933(struct intel_gt *gt)
 {
        return MEDIA_VER_FULL(gt->i915) == IP_VER(13, 0) && gt->type == GT_MEDIA;
index 608f5c87292857c6b2777bbd809c5bd87a48238c..003eb93b826fd06fa122650b99b7cdbb09fe3161 100644 (file)
@@ -82,17 +82,18 @@ struct drm_printer;
                  ##__VA_ARGS__);                                       \
 } while (0)
 
-#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
-       IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 55), IP_VER(12, 71)) && \
-       engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
-
 static inline bool gt_is_root(struct intel_gt *gt)
 {
        return !gt->info.id;
 }
 
+bool intel_gt_needs_wa_16018031267(struct intel_gt *gt);
 bool intel_gt_needs_wa_22016122933(struct intel_gt *gt);
 
+#define NEEDS_FASTCOLOR_BLT_WABB(engine) ( \
+       intel_gt_needs_wa_16018031267(engine->gt) && \
+       engine->class == COPY_ENGINE_CLASS && engine->instance == 0)
+
 static inline struct intel_gt *uc_to_gt(struct intel_uc *uc)
 {
        return container_of(uc, struct intel_gt, uc);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.c
new file mode 100644 (file)
index 0000000..044219c
--- /dev/null
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright Â© 2024 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
+#include "intel_gt_regs.h"
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt)
+{
+       int cslice;
+       u32 mode = 0;
+       int first_ccs = __ffs(CCS_MASK(gt));
+
+       if (!IS_DG2(gt->i915))
+               return;
+
+       /* Build the value for the fixed CCS load balancing */
+       for (cslice = 0; cslice < I915_MAX_CCS; cslice++) {
+               if (CCS_MASK(gt) & BIT(cslice))
+                       /*
+                        * If available, assign the cslice
+                        * to the first available engine...
+                        */
+                       mode |= XEHP_CCS_MODE_CSLICE(cslice, first_ccs);
+
+               else
+                       /*
+                        * ... otherwise, mark the cslice as
+                        * unavailable if no CCS dispatches here
+                        */
+                       mode |= XEHP_CCS_MODE_CSLICE(cslice,
+                                                    XEHP_CCS_MODE_CSLICE_MASK);
+       }
+
+       intel_uncore_write(gt->uncore, XEHP_CCS_MODE, mode);
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h b/drivers/gpu/drm/i915/gt/intel_gt_ccs_mode.h
new file mode 100644 (file)
index 0000000..9e5549c
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright Â© 2024 Intel Corporation
+ */
+
+#ifndef __INTEL_GT_CCS_MODE_H__
+#define __INTEL_GT_CCS_MODE_H__
+
+struct intel_gt;
+
+void intel_gt_apply_ccs_mode(struct intel_gt *gt);
+
+#endif /* __INTEL_GT_CCS_MODE_H__ */
index 50962cfd1353ae4673b27a9bb2437d47633b5651..743fe35667227451436205f9e44514df1c4e809b 100644 (file)
 #define   ECOBITS_PPGTT_CACHE4B                        (0 << 8)
 
 #define GEN12_RCU_MODE                         _MMIO(0x14800)
+#define   XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE   REG_BIT(1)
 #define   GEN12_RCU_MODE_CCS_ENABLE            REG_BIT(0)
 
+#define XEHP_CCS_MODE                          _MMIO(0x14804)
+#define   XEHP_CCS_MODE_CSLICE_MASK            REG_GENMASK(2, 0) /* CCS0-3 + rsvd */
+#define   XEHP_CCS_MODE_CSLICE_WIDTH           ilog2(XEHP_CCS_MODE_CSLICE_MASK + 1)
+#define   XEHP_CCS_MODE_CSLICE(cslice, ccs)    (ccs << (cslice * XEHP_CCS_MODE_CSLICE_WIDTH))
+
 #define CHV_FUSE_GT                            _MMIO(VLV_GUNIT_BASE + 0x2168)
 #define   CHV_FGT_DISABLE_SS0                  (1 << 10)
 #define   CHV_FGT_DISABLE_SS1                  (1 << 11)
index 25413809b9dc99734409210259a9f51f1fffad88..6ec3582c97357780f823865cf0a9a9581b50d288 100644 (file)
@@ -10,6 +10,7 @@
 #include "intel_engine_regs.h"
 #include "intel_gpu_commands.h"
 #include "intel_gt.h"
+#include "intel_gt_ccs_mode.h"
 #include "intel_gt_mcr.h"
 #include "intel_gt_print.h"
 #include "intel_gt_regs.h"
@@ -51,7 +52,8 @@
  *   registers belonging to BCS, VCS or VECS should be implemented in
  *   xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
  *   engine's MMIO range but that are part of of the common RCS/CCS reset domain
- *   should be implemented in general_render_compute_wa_init().
+ *   should be implemented in general_render_compute_wa_init(). The settings
+ *   about the CCS load balancing should be added in ccs_engine_wa_mode().
  *
  * - GT workarounds: the list of these WAs is applied whenever these registers
  *   revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -2854,6 +2856,28 @@ add_render_compute_tuning_settings(struct intel_gt *gt,
                wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
 }
 
+static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
+{
+       struct intel_gt *gt = engine->gt;
+
+       if (!IS_DG2(gt->i915))
+               return;
+
+       /*
+        * Wa_14019159160: This workaround, along with others, leads to
+        * significant challenges in utilizing load balancing among the
+        * CCS slices. Consequently, an architectural decision has been
+        * made to completely disable automatic CCS load balancing.
+        */
+       wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
+
+       /*
+        * After having disabled automatic load balancing we need to
+        * assign all slices to a single CCS. We will call it CCS mode 1
+        */
+       intel_gt_apply_ccs_mode(gt);
+}
+
 /*
  * The workarounds in this function apply to shared registers in
  * the general render reset domain that aren't tied to a
@@ -3004,8 +3028,10 @@ engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal
         * to a single RCS/CCS engine's workaround list since
         * they're reset as part of the general render domain reset.
         */
-       if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE)
+       if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
                general_render_compute_wa_init(engine, wal);
+               ccs_engine_wa_mode(engine, wal);
+       }
 
        if (engine->class == COMPUTE_CLASS)
                ccs_engine_wa_init(engine, wal);
index 0a0a11dc9ec03eeba855f47ca57c1ad1c5669f54..ee02cd833c5e4345abdc3fb83968769999ac4340 100644 (file)
@@ -812,15 +812,15 @@ op_remap(struct drm_gpuva_op_remap *r,
        struct drm_gpuva_op_unmap *u = r->unmap;
        struct nouveau_uvma *uvma = uvma_from_va(u->va);
        u64 addr = uvma->va.va.addr;
-       u64 range = uvma->va.va.range;
+       u64 end = uvma->va.va.addr + uvma->va.va.range;
 
        if (r->prev)
                addr = r->prev->va.addr + r->prev->va.range;
 
        if (r->next)
-               range = r->next->va.addr - addr;
+               end = r->next->va.addr;
 
-       op_unmap_range(u, addr, range);
+       op_unmap_range(u, addr, end - addr);
 }
 
 static int
index 986e8d547c94246a5f7bd058e6ddf555ffc651a4..060c74a80eb14b916db3c441e44b137dd15b7336 100644 (file)
@@ -420,7 +420,7 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
                        return ret;
        } else {
                ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
-                                     &args, sizeof(args));;
+                                     &args, sizeof(args));
                if (ret)
                        return ret;
        }
index 9063ce2546422fd93eb0c0b847cab68aac0ee753..fd8e44992184fa2e63a11e1810ea8e79f9e929a4 100644 (file)
@@ -441,19 +441,19 @@ void panfrost_gpu_power_off(struct panfrost_device *pfdev)
 
        gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present);
        ret = readl_relaxed_poll_timeout(pfdev->iomem + SHADER_PWRTRANS_LO,
-                                        val, !val, 1, 1000);
+                                        val, !val, 1, 2000);
        if (ret)
                dev_err(pfdev->dev, "shader power transition timeout");
 
        gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
        ret = readl_relaxed_poll_timeout(pfdev->iomem + TILER_PWRTRANS_LO,
-                                        val, !val, 1, 1000);
+                                        val, !val, 1, 2000);
        if (ret)
                dev_err(pfdev->dev, "tiler power transition timeout");
 
        gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);
        ret = readl_poll_timeout(pfdev->iomem + L2_PWRTRANS_LO,
-                                val, !val, 0, 1000);
+                                val, !val, 0, 2000);
        if (ret)
                dev_err(pfdev->dev, "l2 power transition timeout");
 }
index ca85e81fdb44383ffdafdb48a98a843cb1884b71..d32ff3857e65838d460d507440d576601fa02f03 100644 (file)
@@ -193,6 +193,9 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
 {
        struct xe_device *xe = to_xe_device(dev);
 
+       if (xe->preempt_fence_wq)
+               destroy_workqueue(xe->preempt_fence_wq);
+
        if (xe->ordered_wq)
                destroy_workqueue(xe->ordered_wq);
 
@@ -258,9 +261,15 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
        INIT_LIST_HEAD(&xe->pinned.external_vram);
        INIT_LIST_HEAD(&xe->pinned.evicted);
 
+       xe->preempt_fence_wq = alloc_ordered_workqueue("xe-preempt-fence-wq", 0);
        xe->ordered_wq = alloc_ordered_workqueue("xe-ordered-wq", 0);
        xe->unordered_wq = alloc_workqueue("xe-unordered-wq", 0, 0);
-       if (!xe->ordered_wq || !xe->unordered_wq) {
+       if (!xe->ordered_wq || !xe->unordered_wq ||
+           !xe->preempt_fence_wq) {
+               /*
+                * Cleanup done in xe_device_destroy via
+                * drmm_add_action_or_reset register above
+                */
                drm_err(&xe->drm, "Failed to allocate xe workqueues\n");
                err = -ENOMEM;
                goto err;
index 9785eef2e5a4e6566c452e1fa8c45c447fe00b76..8e3a222b41cf0a4dda7286b10566e6def0d97ad4 100644 (file)
@@ -363,6 +363,9 @@ struct xe_device {
        /** @ufence_wq: user fence wait queue */
        wait_queue_head_t ufence_wq;
 
+       /** @preempt_fence_wq: used to serialize preempt fences */
+       struct workqueue_struct *preempt_fence_wq;
+
        /** @ordered_wq: used to serialize compute mode resume */
        struct workqueue_struct *ordered_wq;
 
index 826c8b389672502dfebd6e89c6c1997bf8f0c9a2..cc5e0f75de3c7350770323aeea9570ddd89d48bb 100644 (file)
  *     Unlock all
  */
 
+/*
+ * Add validation and rebinding to the drm_exec locking loop, since both can
+ * trigger eviction which may require sleeping dma_resv locks.
+ */
 static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
 {
        struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
-       struct drm_gem_object *obj;
-       unsigned long index;
-       int num_fences;
-       int ret;
-
-       ret = drm_gpuvm_validate(vm_exec->vm, &vm_exec->exec);
-       if (ret)
-               return ret;
-
-       /*
-        * 1 fence slot for the final submit, and 1 more for every per-tile for
-        * GPU bind and 1 extra for CPU bind. Note that there are potentially
-        * many vma per object/dma-resv, however the fence slot will just be
-        * re-used, since they are largely the same timeline and the seqno
-        * should be in order. In the case of CPU bind there is dummy fence used
-        * for all CPU binds, so no need to have a per-tile slot for that.
-        */
-       num_fences = 1 + 1 + vm->xe->info.tile_count;
 
-       /*
-        * We don't know upfront exactly how many fence slots we will need at
-        * the start of the exec, since the TTM bo_validate above can consume
-        * numerous fence slots. Also due to how the dma_resv_reserve_fences()
-        * works it only ensures that at least that many fence slots are
-        * available i.e if there are already 10 slots available and we reserve
-        * two more, it can just noop without reserving anything.  With this it
-        * is quite possible that TTM steals some of the fence slots and then
-        * when it comes time to do the vma binding and final exec stage we are
-        * lacking enough fence slots, leading to some nasty BUG_ON() when
-        * adding the fences. Hence just add our own fences here, after the
-        * validate stage.
-        */
-       drm_exec_for_each_locked_object(&vm_exec->exec, index, obj) {
-               ret = dma_resv_reserve_fences(obj->resv, num_fences);
-               if (ret)
-                       return ret;
-       }
-
-       return 0;
+       /* The fence slot added here is intended for the exec sched job. */
+       return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
 }
 
 int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -152,7 +120,6 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
        struct drm_exec *exec = &vm_exec.exec;
        u32 i, num_syncs = 0, num_ufence = 0;
        struct xe_sched_job *job;
-       struct dma_fence *rebind_fence;
        struct xe_vm *vm;
        bool write_locked, skip_retry = false;
        ktime_t end = 0;
@@ -290,39 +257,7 @@ retry:
                goto err_exec;
        }
 
-       /*
-        * Rebind any invalidated userptr or evicted BOs in the VM, non-compute
-        * VM mode only.
-        */
-       rebind_fence = xe_vm_rebind(vm, false);
-       if (IS_ERR(rebind_fence)) {
-               err = PTR_ERR(rebind_fence);
-               goto err_put_job;
-       }
-
-       /*
-        * We store the rebind_fence in the VM so subsequent execs don't get
-        * scheduled before the rebinds of userptrs / evicted BOs is complete.
-        */
-       if (rebind_fence) {
-               dma_fence_put(vm->rebind_fence);
-               vm->rebind_fence = rebind_fence;
-       }
-       if (vm->rebind_fence) {
-               if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
-                            &vm->rebind_fence->flags)) {
-                       dma_fence_put(vm->rebind_fence);
-                       vm->rebind_fence = NULL;
-               } else {
-                       dma_fence_get(vm->rebind_fence);
-                       err = drm_sched_job_add_dependency(&job->drm,
-                                                          vm->rebind_fence);
-                       if (err)
-                               goto err_put_job;
-               }
-       }
-
-       /* Wait behind munmap style rebinds */
+       /* Wait behind rebinds */
        if (!xe_vm_in_lr_mode(vm)) {
                err = drm_sched_job_add_resv_dependencies(&job->drm,
                                                          xe_vm_resv(vm),
index 62b3d9d1d7cdd4f2d65c55db414a00b7bd7fbd06..462b331950320c0e49901fb09c32a8cdcffc1745 100644 (file)
@@ -148,6 +148,11 @@ struct xe_exec_queue {
        const struct xe_ring_ops *ring_ops;
        /** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
        struct drm_sched_entity *entity;
+       /**
+        * @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
+        * Protected by @vm's resv. Unused if @vm == NULL.
+        */
+       u64 tlb_flush_seqno;
        /** @lrc: logical ring context for this exec queue */
        struct xe_lrc lrc[];
 };
index 241c294270d9167f25d1898f8f590c7aabb06ca0..fa9e9853c53ba605e0e35870bed69e7d09d25934 100644 (file)
@@ -100,10 +100,9 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
 {
        struct xe_bo *bo = xe_vma_bo(vma);
        struct xe_vm *vm = xe_vma_vm(vma);
-       unsigned int num_shared = 2; /* slots for bind + move */
        int err;
 
-       err = xe_vm_prepare_vma(exec, vma, num_shared);
+       err = xe_vm_lock_vma(exec, vma);
        if (err)
                return err;
 
index f03e077f81a04fcb9344f8c634856acab516c6f1..e598a4363d0190504d9ca8d826d7d996f0d2dfaf 100644 (file)
@@ -61,7 +61,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
        INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
        spin_lock_init(&gt->tlb_invalidation.pending_lock);
        spin_lock_init(&gt->tlb_invalidation.lock);
-       gt->tlb_invalidation.fence_context = dma_fence_context_alloc(1);
        INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
                          xe_gt_tlb_fence_timeout);
 
index 70c615dd14986599324a2fb68f766889761c7eb1..07b2f724ec45685feaa4b5ab86b6f2011f65198e 100644 (file)
@@ -177,13 +177,6 @@ struct xe_gt {
                 * xe_gt_tlb_fence_timeout after the timeut interval is over.
                 */
                struct delayed_work fence_tdr;
-               /** @tlb_invalidation.fence_context: context for TLB invalidation fences */
-               u64 fence_context;
-               /**
-                * @tlb_invalidation.fence_seqno: seqno to TLB invalidation fences, protected by
-                * tlb_invalidation.lock
-                */
-               u32 fence_seqno;
                /** @tlb_invalidation.lock: protects TLB invalidation fences */
                spinlock_t lock;
        } tlb_invalidation;
index 7bce2a332603c086bf4bed63c212cdff311f6bbf..7d50c6e89d8e7dc0ba718b9439ef86858e1f3992 100644 (file)
@@ -49,7 +49,7 @@ static bool preempt_fence_enable_signaling(struct dma_fence *fence)
        struct xe_exec_queue *q = pfence->q;
 
        pfence->error = q->ops->suspend(q);
-       queue_work(system_unbound_wq, &pfence->preempt_work);
+       queue_work(q->vm->xe->preempt_fence_wq, &pfence->preempt_work);
        return true;
 }
 
index 7f54bc3e389d58f8023f3a1092aa47d3e852a16b..4efc8c1a3d7a99e00107aeb88c803db26cb62881 100644 (file)
@@ -1135,8 +1135,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
        spin_lock_irq(&gt->tlb_invalidation.lock);
        dma_fence_init(&ifence->base.base, &invalidation_fence_ops,
                       &gt->tlb_invalidation.lock,
-                      gt->tlb_invalidation.fence_context,
-                      ++gt->tlb_invalidation.fence_seqno);
+                      dma_fence_context_alloc(1), 1);
        spin_unlock_irq(&gt->tlb_invalidation.lock);
 
        INIT_LIST_HEAD(&ifence->base.link);
@@ -1236,6 +1235,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
        err = xe_pt_prepare_bind(tile, vma, entries, &num_entries);
        if (err)
                goto err;
+
+       err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+       if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+               err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+       if (err)
+               goto err;
+
        xe_tile_assert(tile, num_entries <= ARRAY_SIZE(entries));
 
        xe_vm_dbg_print_entries(tile_to_xe(tile), entries, num_entries);
@@ -1254,11 +1260,13 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
         * non-faulting LR, in particular on user-space batch buffer chaining,
         * it needs to be done here.
         */
-       if ((rebind && !xe_vm_in_lr_mode(vm) && !vm->batch_invalidate_tlb) ||
-           (!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
+       if ((!rebind && xe_vm_has_scratch(vm) && xe_vm_in_preempt_fence_mode(vm))) {
                ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
                if (!ifence)
                        return ERR_PTR(-ENOMEM);
+       } else if (rebind && !xe_vm_in_lr_mode(vm)) {
+               /* We bump also if batch_invalidate_tlb is true */
+               vm->tlb_flush_seqno++;
        }
 
        rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
@@ -1297,7 +1305,7 @@ __xe_pt_bind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queue
                }
 
                /* add shared fence now for pagetable delayed destroy */
-               dma_resv_add_fence(xe_vm_resv(vm), fence, !rebind &&
+               dma_resv_add_fence(xe_vm_resv(vm), fence, rebind ||
                                   last_munmap_rebind ?
                                   DMA_RESV_USAGE_KERNEL :
                                   DMA_RESV_USAGE_BOOKKEEP);
@@ -1576,6 +1584,7 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
        struct dma_fence *fence = NULL;
        struct invalidation_fence *ifence;
        struct xe_range_fence *rfence;
+       int err;
 
        LLIST_HEAD(deferred);
 
@@ -1593,6 +1602,12 @@ __xe_pt_unbind_vma(struct xe_tile *tile, struct xe_vma *vma, struct xe_exec_queu
        xe_pt_calc_rfence_interval(vma, &unbind_pt_update, entries,
                                   num_entries);
 
+       err = dma_resv_reserve_fences(xe_vm_resv(vm), 1);
+       if (!err && !xe_vma_has_no_bo(vma) && !xe_vma_bo(vma)->vm)
+               err = dma_resv_reserve_fences(xe_vma_bo(vma)->ttm.base.resv, 1);
+       if (err)
+               return ERR_PTR(err);
+
        ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
        if (!ifence)
                return ERR_PTR(-ENOMEM);
index c4edffcd4a320666d576d950ab15dc614545a053..5b2b37b598130ac464a2c344bad52b731e778e28 100644 (file)
@@ -219,10 +219,9 @@ static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc
 {
        u32 dw[MAX_JOB_SIZE_DW], i = 0;
        u32 ppgtt_flag = get_ppgtt_flag(job);
-       struct xe_vm *vm = job->q->vm;
        struct xe_gt *gt = job->q->gt;
 
-       if (vm && vm->batch_invalidate_tlb) {
+       if (job->ring_ops_flush_tlb) {
                dw[i++] = preparser_disable(true);
                i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
                                        seqno, true, dw, i);
@@ -270,7 +269,6 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
        struct xe_gt *gt = job->q->gt;
        struct xe_device *xe = gt_to_xe(gt);
        bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
-       struct xe_vm *vm = job->q->vm;
 
        dw[i++] = preparser_disable(true);
 
@@ -282,13 +280,13 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
                        i = emit_aux_table_inv(gt, VE0_AUX_INV, dw, i);
        }
 
-       if (vm && vm->batch_invalidate_tlb)
+       if (job->ring_ops_flush_tlb)
                i = emit_flush_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
                                        seqno, true, dw, i);
 
        dw[i++] = preparser_disable(false);
 
-       if (!vm || !vm->batch_invalidate_tlb)
+       if (!job->ring_ops_flush_tlb)
                i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
                                        seqno, dw, i);
 
@@ -317,7 +315,6 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
        struct xe_gt *gt = job->q->gt;
        struct xe_device *xe = gt_to_xe(gt);
        bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
-       struct xe_vm *vm = job->q->vm;
        u32 mask_flags = 0;
 
        dw[i++] = preparser_disable(true);
@@ -327,7 +324,7 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
                mask_flags = PIPE_CONTROL_3D_ENGINE_FLAGS;
 
        /* See __xe_pt_bind_vma() for a discussion on TLB invalidations. */
-       i = emit_pipe_invalidate(mask_flags, vm && vm->batch_invalidate_tlb, dw, i);
+       i = emit_pipe_invalidate(mask_flags, job->ring_ops_flush_tlb, dw, i);
 
        /* hsdes: 1809175790 */
        if (has_aux_ccs(xe))
index 8151ddafb940756d87dbca45e6d3407354535ce4..b0c7fa4693cfe4a999b93b3878cb72c6150ebcbd 100644 (file)
@@ -250,6 +250,16 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
 
 void xe_sched_job_arm(struct xe_sched_job *job)
 {
+       struct xe_exec_queue *q = job->q;
+       struct xe_vm *vm = q->vm;
+
+       if (vm && !xe_sched_job_is_migration(q) && !xe_vm_in_lr_mode(vm) &&
+           (vm->batch_invalidate_tlb || vm->tlb_flush_seqno != q->tlb_flush_seqno)) {
+               xe_vm_assert_held(vm);
+               q->tlb_flush_seqno = vm->tlb_flush_seqno;
+               job->ring_ops_flush_tlb = true;
+       }
+
        drm_sched_job_arm(&job->drm);
 }
 
index b1d83da50a53da59b6d72af1bbd21c8d98ca3517..5e12724219fdd485f2b770bd4b31e78aa2ab42af 100644 (file)
@@ -39,6 +39,8 @@ struct xe_sched_job {
        } user_fence;
        /** @migrate_flush_flags: Additional flush flags for migration jobs */
        u32 migrate_flush_flags;
+       /** @ring_ops_flush_tlb: The ring ops need to flush TLB before payload. */
+       bool ring_ops_flush_tlb;
        /** @batch_addr: batch buffer address of job */
        u64 batch_addr[];
 };
index f88faef4142bde018f336d33d3e2eed726a4bc29..62d1ef8867a84351ae7444d63113d8867dfbb0c5 100644 (file)
@@ -482,17 +482,53 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
        return 0;
 }
 
+/**
+ * xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
+ * @vm: The vm for which we are rebinding.
+ * @exec: The struct drm_exec with the locked GEM objects.
+ * @num_fences: The number of fences to reserve for the operation, not
+ * including rebinds and validations.
+ *
+ * Validates all evicted gem objects and rebinds their vmas. Note that
+ * rebindings may cause evictions and hence the validation-rebind
+ * sequence is rerun until there are no more objects to validate.
+ *
+ * Return: 0 on success, negative error code on error. In particular,
+ * may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
+ * the drm_exec transaction needs to be restarted.
+ */
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+                         unsigned int num_fences)
+{
+       struct drm_gem_object *obj;
+       unsigned long index;
+       int ret;
+
+       do {
+               ret = drm_gpuvm_validate(&vm->gpuvm, exec);
+               if (ret)
+                       return ret;
+
+               ret = xe_vm_rebind(vm, false);
+               if (ret)
+                       return ret;
+       } while (!list_empty(&vm->gpuvm.evict.list));
+
+       drm_exec_for_each_locked_object(exec, index, obj) {
+               ret = dma_resv_reserve_fences(obj->resv, num_fences);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
                                 bool *done)
 {
        int err;
 
-       /*
-        * 1 fence for each preempt fence plus a fence for each tile from a
-        * possible rebind
-        */
-       err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, vm->preempt.num_exec_queues +
-                                  vm->xe->info.tile_count);
+       err = drm_gpuvm_prepare_vm(&vm->gpuvm, exec, 0);
        if (err)
                return err;
 
@@ -507,7 +543,7 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
                return 0;
        }
 
-       err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, vm->preempt.num_exec_queues);
+       err = drm_gpuvm_prepare_objects(&vm->gpuvm, exec, 0);
        if (err)
                return err;
 
@@ -515,14 +551,19 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
        if (err)
                return err;
 
-       return drm_gpuvm_validate(&vm->gpuvm, exec);
+       /*
+        * Add validation and rebinding to the locking loop since both can
+        * cause evictions which may require blocing dma_resv locks.
+        * The fence reservation here is intended for the new preempt fences
+        * we attach at the end of the rebind work.
+        */
+       return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
 }
 
 static void preempt_rebind_work_func(struct work_struct *w)
 {
        struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
        struct drm_exec exec;
-       struct dma_fence *rebind_fence;
        unsigned int fence_count = 0;
        LIST_HEAD(preempt_fences);
        ktime_t end = 0;
@@ -568,18 +609,11 @@ retry:
        if (err)
                goto out_unlock;
 
-       rebind_fence = xe_vm_rebind(vm, true);
-       if (IS_ERR(rebind_fence)) {
-               err = PTR_ERR(rebind_fence);
+       err = xe_vm_rebind(vm, true);
+       if (err)
                goto out_unlock;
-       }
 
-       if (rebind_fence) {
-               dma_fence_wait(rebind_fence, false);
-               dma_fence_put(rebind_fence);
-       }
-
-       /* Wait on munmap style VM unbinds */
+       /* Wait on rebinds and munmap style VM unbinds */
        wait = dma_resv_wait_timeout(xe_vm_resv(vm),
                                     DMA_RESV_USAGE_KERNEL,
                                     false, MAX_SCHEDULE_TIMEOUT);
@@ -773,14 +807,14 @@ xe_vm_bind_vma(struct xe_vma *vma, struct xe_exec_queue *q,
               struct xe_sync_entry *syncs, u32 num_syncs,
               bool first_op, bool last_op);
 
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
 {
-       struct dma_fence *fence = NULL;
+       struct dma_fence *fence;
        struct xe_vma *vma, *next;
 
        lockdep_assert_held(&vm->lock);
        if (xe_vm_in_lr_mode(vm) && !rebind_worker)
-               return NULL;
+               return 0;
 
        xe_vm_assert_held(vm);
        list_for_each_entry_safe(vma, next, &vm->rebind_list,
@@ -788,17 +822,17 @@ struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
                xe_assert(vm->xe, vma->tile_present);
 
                list_del_init(&vma->combined_links.rebind);
-               dma_fence_put(fence);
                if (rebind_worker)
                        trace_xe_vma_rebind_worker(vma);
                else
                        trace_xe_vma_rebind_exec(vma);
                fence = xe_vm_bind_vma(vma, NULL, NULL, 0, false, false);
                if (IS_ERR(fence))
-                       return fence;
+                       return PTR_ERR(fence);
+               dma_fence_put(fence);
        }
 
-       return fence;
+       return 0;
 }
 
 static void xe_vma_free(struct xe_vma *vma)
@@ -1004,35 +1038,26 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
 }
 
 /**
- * xe_vm_prepare_vma() - drm_exec utility to lock a vma
+ * xe_vm_lock_vma() - drm_exec utility to lock a vma
  * @exec: The drm_exec object we're currently locking for.
  * @vma: The vma for witch we want to lock the vm resv and any attached
  * object's resv.
- * @num_shared: The number of dma-fence slots to pre-allocate in the
- * objects' reservation objects.
  *
  * Return: 0 on success, negative error code on error. In particular
  * may return -EDEADLK on WW transaction contention and -EINTR if
  * an interruptible wait is terminated by a signal.
  */
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
-                     unsigned int num_shared)
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
 {
        struct xe_vm *vm = xe_vma_vm(vma);
        struct xe_bo *bo = xe_vma_bo(vma);
        int err;
 
        XE_WARN_ON(!vm);
-       if (num_shared)
-               err = drm_exec_prepare_obj(exec, xe_vm_obj(vm), num_shared);
-       else
-               err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
-       if (!err && bo && !bo->vm) {
-               if (num_shared)
-                       err = drm_exec_prepare_obj(exec, &bo->ttm.base, num_shared);
-               else
-                       err = drm_exec_lock_obj(exec, &bo->ttm.base);
-       }
+
+       err = drm_exec_lock_obj(exec, xe_vm_obj(vm));
+       if (!err && bo && !bo->vm)
+               err = drm_exec_lock_obj(exec, &bo->ttm.base);
 
        return err;
 }
@@ -1044,7 +1069,7 @@ static void xe_vma_destroy_unlocked(struct xe_vma *vma)
 
        drm_exec_init(&exec, 0, 0);
        drm_exec_until_all_locked(&exec) {
-               err = xe_vm_prepare_vma(&exec, vma, 0);
+               err = xe_vm_lock_vma(&exec, vma);
                drm_exec_retry_on_contention(&exec);
                if (XE_WARN_ON(err))
                        break;
@@ -1589,7 +1614,6 @@ static void vm_destroy_work_func(struct work_struct *w)
                XE_WARN_ON(vm->pt_root[id]);
 
        trace_xe_vm_free(vm);
-       dma_fence_put(vm->rebind_fence);
        kfree(vm);
 }
 
@@ -2512,7 +2536,7 @@ static int op_execute(struct drm_exec *exec, struct xe_vm *vm,
 
        lockdep_assert_held_write(&vm->lock);
 
-       err = xe_vm_prepare_vma(exec, vma, 1);
+       err = xe_vm_lock_vma(exec, vma);
        if (err)
                return err;
 
index 6df1f1c7f85d98a2b948ba41ec9f1ed5a287faf0..306cd0934a190ba0d5580787522c59e762b3b163 100644 (file)
@@ -207,7 +207,7 @@ int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
 
 int xe_vm_userptr_check_repin(struct xe_vm *vm);
 
-struct dma_fence *xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
+int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
 
 int xe_vm_invalidate_vma(struct xe_vma *vma);
 
@@ -242,8 +242,10 @@ bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
 
 int xe_analyze_vm(struct drm_printer *p, struct xe_vm *vm, int gt_id);
 
-int xe_vm_prepare_vma(struct drm_exec *exec, struct xe_vma *vma,
-                     unsigned int num_shared);
+int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
+
+int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
+                         unsigned int num_fences);
 
 /**
  * xe_vm_resv() - Return's the vm's reservation object
index ae5fb565f6bf48d52e29c811a8333793e4e128fd..badf3945083d56723cc477b3074929a4db316753 100644 (file)
@@ -177,9 +177,6 @@ struct xe_vm {
         */
        struct list_head rebind_list;
 
-       /** @rebind_fence: rebind fence from execbuf */
-       struct dma_fence *rebind_fence;
-
        /**
         * @destroy_work: worker to destroy VM, needed as a dma_fence signaling
         * from an irq context can be last put and the destroy needs to be able
@@ -264,6 +261,11 @@ struct xe_vm {
                bool capture_once;
        } error_capture;
 
+       /**
+        * @tlb_flush_seqno: Required TLB flush seqno for the next exec.
+        * protected by the vm resv.
+        */
+       u64 tlb_flush_seqno;
        /** @batch_invalidate_tlb: Always invalidate TLB before batch start */
        bool batch_invalidate_tlb;
        /** @xef: XE file handle for tracking this VM's drm client */
index 97a00ec9a4d48944a8233b49c5fa0106493abb47..caacdc0a3819458fbb47faae23432f5950fe5869 100644 (file)
@@ -209,7 +209,7 @@ static void block2mtd_free_device(struct block2mtd_dev *dev)
 
        if (dev->bdev_file) {
                invalidate_mapping_pages(dev->bdev_file->f_mapping, 0, -1);
-               fput(dev->bdev_file);
+               bdev_fput(dev->bdev_file);
        }
 
        kfree(dev);
index 9ed1821184ece57f2baa7e21003e91dc79d977b3..c95787cb908673c6ab1b236e9c447952e5e8e452 100644 (file)
@@ -5503,8 +5503,12 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
                .family = MV88E6XXX_FAMILY_6250,
                .name = "Marvell 88E6020",
                .num_databases = 64,
-               .num_ports = 4,
+               /* Ports 2-4 are not routed to pins
+                * => usable ports 0, 1, 5, 6
+                */
+               .num_ports = 7,
                .num_internal_phys = 2,
+               .invalid_port_mask = BIT(2) | BIT(3) | BIT(4),
                .max_vid = 4095,
                .port_base_addr = 0x8,
                .phy_base_addr = 0x0,
index 833e55e4b961296901d08def7fd2ff98caa7afb3..52ddb4ef259e93b9ddd64663f67b8f06b042fadf 100644 (file)
@@ -94,7 +94,7 @@ int sja1110_pcs_mdio_read_c45(struct mii_bus *bus, int phy, int mmd, int reg)
        return tmp & 0xffff;
 }
 
-int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int reg, int mmd,
+int sja1110_pcs_mdio_write_c45(struct mii_bus *bus, int phy, int mmd, int reg,
                               u16 val)
 {
        struct sja1105_mdio_private *mdio_priv = bus->priv;
index 7396e2823e3285a353d9483b795cdb51cbca58d0..b1f84b37032a7833d7e4f3d045e8755ace6f79d3 100644 (file)
@@ -3280,7 +3280,7 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv,
 }
 
 /* Returns a reusable dma control register value */
-static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
+static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv, bool flush_rx)
 {
        unsigned int i;
        u32 reg;
@@ -3305,6 +3305,14 @@ static u32 bcmgenet_dma_disable(struct bcmgenet_priv *priv)
        udelay(10);
        bcmgenet_umac_writel(priv, 0, UMAC_TX_FLUSH);
 
+       if (flush_rx) {
+               reg = bcmgenet_rbuf_ctrl_get(priv);
+               bcmgenet_rbuf_ctrl_set(priv, reg | BIT(0));
+               udelay(10);
+               bcmgenet_rbuf_ctrl_set(priv, reg);
+               udelay(10);
+       }
+
        return dma_ctrl;
 }
 
@@ -3368,8 +3376,8 @@ static int bcmgenet_open(struct net_device *dev)
 
        bcmgenet_set_hw_addr(priv, dev->dev_addr);
 
-       /* Disable RX/TX DMA and flush TX queues */
-       dma_ctrl = bcmgenet_dma_disable(priv);
+       /* Disable RX/TX DMA and flush TX and RX queues */
+       dma_ctrl = bcmgenet_dma_disable(priv, true);
 
        /* Reinitialize TDMA and RDMA and SW housekeeping */
        ret = bcmgenet_init_dma(priv);
@@ -4235,7 +4243,7 @@ static int bcmgenet_resume(struct device *d)
                        bcmgenet_hfb_create_rxnfc_filter(priv, rule);
 
        /* Disable RX/TX DMA and flush TX queues */
-       dma_ctrl = bcmgenet_dma_disable(priv);
+       dma_ctrl = bcmgenet_dma_disable(priv, false);
 
        /* Reinitialize TDMA and RDMA and SW housekeeping */
        ret = bcmgenet_init_dma(priv);
index d7693fdf640d53fa64a9801f76414a39094adb2c..8bd213da8fb6f5b0f5f4b06c4f7e3877d7cc5a71 100644 (file)
@@ -2454,8 +2454,6 @@ static int fec_enet_mii_probe(struct net_device *ndev)
        fep->link = 0;
        fep->full_duplex = 0;
 
-       phy_dev->mac_managed_pm = true;
-
        phy_attached_info(phy_dev);
 
        return 0;
@@ -2467,10 +2465,12 @@ static int fec_enet_mii_init(struct platform_device *pdev)
        struct net_device *ndev = platform_get_drvdata(pdev);
        struct fec_enet_private *fep = netdev_priv(ndev);
        bool suppress_preamble = false;
+       struct phy_device *phydev;
        struct device_node *node;
        int err = -ENXIO;
        u32 mii_speed, holdtime;
        u32 bus_freq;
+       int addr;
 
        /*
         * The i.MX28 dual fec interfaces are not equal.
@@ -2584,6 +2584,13 @@ static int fec_enet_mii_init(struct platform_device *pdev)
                goto err_out_free_mdiobus;
        of_node_put(node);
 
+       /* find all the PHY devices on the bus and set mac_managed_pm to true */
+       for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+               phydev = mdiobus_get_phy(fep->mii_bus, addr);
+               if (phydev)
+                       phydev->mac_managed_pm = true;
+       }
+
        mii_cnt++;
 
        /* save fec0 mii_bus */
index 1fef6bb5a5fbc8603c2ca079e7b18c4221032189..4b6e7536170abc584a7dc95c6c80b926d7c17375 100644 (file)
@@ -628,6 +628,7 @@ struct e1000_phy_info {
        u32 id;
        u32 reset_delay_us;     /* in usec */
        u32 revision;
+       u32 retry_count;
 
        enum e1000_media_type media_type;
 
@@ -644,6 +645,7 @@ struct e1000_phy_info {
        bool polarity_correction;
        bool speed_downgraded;
        bool autoneg_wait_to_complete;
+       bool retry_enabled;
 };
 
 struct e1000_nvm_info {
index 19e450a5bd314ff67676843a767ec1c5c2bd84d4..f9e94be36e97f20b5aeeccf43dfe580bf7c01847 100644 (file)
@@ -222,11 +222,18 @@ out:
        if (hw->mac.type >= e1000_pch_lpt) {
                /* Only unforce SMBus if ME is not active */
                if (!(er32(FWSM) & E1000_ICH_FWSM_FW_VALID)) {
+                       /* Switching PHY interface always returns MDI error
+                        * so disable retry mechanism to avoid wasting time
+                        */
+                       e1000e_disable_phy_retry(hw);
+
                        /* Unforce SMBus mode in PHY */
                        e1e_rphy_locked(hw, CV_SMB_CTRL, &phy_reg);
                        phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
                        e1e_wphy_locked(hw, CV_SMB_CTRL, phy_reg);
 
+                       e1000e_enable_phy_retry(hw);
+
                        /* Unforce SMBus mode in MAC */
                        mac_reg = er32(CTRL_EXT);
                        mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
@@ -310,6 +317,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
                goto out;
        }
 
+       /* There is no guarantee that the PHY is accessible at this time
+        * so disable retry mechanism to avoid wasting time
+        */
+       e1000e_disable_phy_retry(hw);
+
        /* The MAC-PHY interconnect may be in SMBus mode.  If the PHY is
         * inaccessible and resetting the PHY is not blocked, toggle the
         * LANPHYPC Value bit to force the interconnect to PCIe mode.
@@ -380,6 +392,8 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
                break;
        }
 
+       e1000e_enable_phy_retry(hw);
+
        hw->phy.ops.release(hw);
        if (!ret_val) {
 
@@ -449,6 +463,11 @@ static s32 e1000_init_phy_params_pchlan(struct e1000_hw *hw)
 
        phy->id = e1000_phy_unknown;
 
+       if (hw->mac.type == e1000_pch_mtp) {
+               phy->retry_count = 2;
+               e1000e_enable_phy_retry(hw);
+       }
+
        ret_val = e1000_init_phy_workarounds_pchlan(hw);
        if (ret_val)
                return ret_val;
@@ -1146,18 +1165,6 @@ s32 e1000_enable_ulp_lpt_lp(struct e1000_hw *hw, bool to_sx)
        if (ret_val)
                goto out;
 
-       /* Force SMBus mode in PHY */
-       ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
-       if (ret_val)
-               goto release;
-       phy_reg |= CV_SMB_CTRL_FORCE_SMBUS;
-       e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
-
-       /* Force SMBus mode in MAC */
-       mac_reg = er32(CTRL_EXT);
-       mac_reg |= E1000_CTRL_EXT_FORCE_SMBUS;
-       ew32(CTRL_EXT, mac_reg);
-
        /* Si workaround for ULP entry flow on i127/rev6 h/w.  Enable
         * LPLU and disable Gig speed when entering ULP
         */
@@ -1313,6 +1320,11 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
                /* Toggle LANPHYPC Value bit */
                e1000_toggle_lanphypc_pch_lpt(hw);
 
+       /* Switching PHY interface always returns MDI error
+        * so disable retry mechanism to avoid wasting time
+        */
+       e1000e_disable_phy_retry(hw);
+
        /* Unforce SMBus mode in PHY */
        ret_val = e1000_read_phy_reg_hv_locked(hw, CV_SMB_CTRL, &phy_reg);
        if (ret_val) {
@@ -1333,6 +1345,8 @@ static s32 e1000_disable_ulp_lpt_lp(struct e1000_hw *hw, bool force)
        phy_reg &= ~CV_SMB_CTRL_FORCE_SMBUS;
        e1000_write_phy_reg_hv_locked(hw, CV_SMB_CTRL, phy_reg);
 
+       e1000e_enable_phy_retry(hw);
+
        /* Unforce SMBus mode in MAC */
        mac_reg = er32(CTRL_EXT);
        mac_reg &= ~E1000_CTRL_EXT_FORCE_SMBUS;
index cc8c531ec3dff3e46ae6e9e83b82b1e311886e35..3692fce201959f3e663db219dd2db00023045ae1 100644 (file)
@@ -6623,6 +6623,7 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
        struct e1000_hw *hw = &adapter->hw;
        u32 ctrl, ctrl_ext, rctl, status, wufc;
        int retval = 0;
+       u16 smb_ctrl;
 
        /* Runtime suspend should only enable wakeup for link changes */
        if (runtime)
@@ -6696,6 +6697,23 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool runtime)
                        if (retval)
                                return retval;
                }
+
+               /* Force SMBUS to allow WOL */
+               /* Switching PHY interface always returns MDI error
+                * so disable retry mechanism to avoid wasting time
+                */
+               e1000e_disable_phy_retry(hw);
+
+               e1e_rphy(hw, CV_SMB_CTRL, &smb_ctrl);
+               smb_ctrl |= CV_SMB_CTRL_FORCE_SMBUS;
+               e1e_wphy(hw, CV_SMB_CTRL, smb_ctrl);
+
+               e1000e_enable_phy_retry(hw);
+
+               /* Force SMBus mode in MAC */
+               ctrl_ext = er32(CTRL_EXT);
+               ctrl_ext |= E1000_CTRL_EXT_FORCE_SMBUS;
+               ew32(CTRL_EXT, ctrl_ext);
        }
 
        /* Ensure that the appropriate bits are set in LPI_CTRL
index 5e329156d1bae0880b3a44a8952b484c080db18e..93544f1cc2a51be0c84c33391211bf47d2675edb 100644 (file)
@@ -107,6 +107,16 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
        return e1e_wphy(hw, M88E1000_PHY_GEN_CONTROL, 0);
 }
 
+void e1000e_disable_phy_retry(struct e1000_hw *hw)
+{
+       hw->phy.retry_enabled = false;
+}
+
+void e1000e_enable_phy_retry(struct e1000_hw *hw)
+{
+       hw->phy.retry_enabled = true;
+}
+
 /**
  *  e1000e_read_phy_reg_mdic - Read MDI control register
  *  @hw: pointer to the HW structure
@@ -118,55 +128,73 @@ s32 e1000e_phy_reset_dsp(struct e1000_hw *hw)
  **/
 s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
 {
+       u32 i, mdic = 0, retry_counter, retry_max;
        struct e1000_phy_info *phy = &hw->phy;
-       u32 i, mdic = 0;
+       bool success;
 
        if (offset > MAX_PHY_REG_ADDRESS) {
                e_dbg("PHY Address %d is out of range\n", offset);
                return -E1000_ERR_PARAM;
        }
 
+       retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
        /* Set up Op-code, Phy Address, and register offset in the MDI
         * Control register.  The MAC will take care of interfacing with the
         * PHY to retrieve the desired data.
         */
-       mdic = ((offset << E1000_MDIC_REG_SHIFT) |
-               (phy->addr << E1000_MDIC_PHY_SHIFT) |
-               (E1000_MDIC_OP_READ));
+       for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+               success = true;
 
-       ew32(MDIC, mdic);
+               mdic = ((offset << E1000_MDIC_REG_SHIFT) |
+                       (phy->addr << E1000_MDIC_PHY_SHIFT) |
+                       (E1000_MDIC_OP_READ));
 
-       /* Poll the ready bit to see if the MDI read completed
-        * Increasing the time out as testing showed failures with
-        * the lower time out
-        */
-       for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
-               udelay(50);
-               mdic = er32(MDIC);
-               if (mdic & E1000_MDIC_READY)
-                       break;
-       }
-       if (!(mdic & E1000_MDIC_READY)) {
-               e_dbg("MDI Read PHY Reg Address %d did not complete\n", offset);
-               return -E1000_ERR_PHY;
-       }
-       if (mdic & E1000_MDIC_ERROR) {
-               e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
-               return -E1000_ERR_PHY;
-       }
-       if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
-               e_dbg("MDI Read offset error - requested %d, returned %d\n",
-                     offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
-               return -E1000_ERR_PHY;
+               ew32(MDIC, mdic);
+
+               /* Poll the ready bit to see if the MDI read completed
+                * Increasing the time out as testing showed failures with
+                * the lower time out
+                */
+               for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+                       usleep_range(50, 60);
+                       mdic = er32(MDIC);
+                       if (mdic & E1000_MDIC_READY)
+                               break;
+               }
+               if (!(mdic & E1000_MDIC_READY)) {
+                       e_dbg("MDI Read PHY Reg Address %d did not complete\n",
+                             offset);
+                       success = false;
+               }
+               if (mdic & E1000_MDIC_ERROR) {
+                       e_dbg("MDI Read PHY Reg Address %d Error\n", offset);
+                       success = false;
+               }
+               if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+                       e_dbg("MDI Read offset error - requested %d, returned %d\n",
+                             offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+                       success = false;
+               }
+
+               /* Allow some time after each MDIC transaction to avoid
+                * reading duplicate data in the next MDIC transaction.
+                */
+               if (hw->mac.type == e1000_pch2lan)
+                       usleep_range(100, 150);
+
+               if (success) {
+                       *data = (u16)mdic;
+                       return 0;
+               }
+
+               if (retry_counter != retry_max) {
+                       e_dbg("Perform retry on PHY transaction...\n");
+                       mdelay(10);
+               }
        }
-       *data = (u16)mdic;
 
-       /* Allow some time after each MDIC transaction to avoid
-        * reading duplicate data in the next MDIC transaction.
-        */
-       if (hw->mac.type == e1000_pch2lan)
-               udelay(100);
-       return 0;
+       return -E1000_ERR_PHY;
 }
 
 /**
@@ -179,56 +207,72 @@ s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data)
  **/
 s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data)
 {
+       u32 i, mdic = 0, retry_counter, retry_max;
        struct e1000_phy_info *phy = &hw->phy;
-       u32 i, mdic = 0;
+       bool success;
 
        if (offset > MAX_PHY_REG_ADDRESS) {
                e_dbg("PHY Address %d is out of range\n", offset);
                return -E1000_ERR_PARAM;
        }
 
+       retry_max = phy->retry_enabled ? phy->retry_count : 0;
+
        /* Set up Op-code, Phy Address, and register offset in the MDI
         * Control register.  The MAC will take care of interfacing with the
         * PHY to retrieve the desired data.
         */
-       mdic = (((u32)data) |
-               (offset << E1000_MDIC_REG_SHIFT) |
-               (phy->addr << E1000_MDIC_PHY_SHIFT) |
-               (E1000_MDIC_OP_WRITE));
+       for (retry_counter = 0; retry_counter <= retry_max; retry_counter++) {
+               success = true;
 
-       ew32(MDIC, mdic);
+               mdic = (((u32)data) |
+                       (offset << E1000_MDIC_REG_SHIFT) |
+                       (phy->addr << E1000_MDIC_PHY_SHIFT) |
+                       (E1000_MDIC_OP_WRITE));
 
-       /* Poll the ready bit to see if the MDI read completed
-        * Increasing the time out as testing showed failures with
-        * the lower time out
-        */
-       for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
-               udelay(50);
-               mdic = er32(MDIC);
-               if (mdic & E1000_MDIC_READY)
-                       break;
-       }
-       if (!(mdic & E1000_MDIC_READY)) {
-               e_dbg("MDI Write PHY Reg Address %d did not complete\n", offset);
-               return -E1000_ERR_PHY;
-       }
-       if (mdic & E1000_MDIC_ERROR) {
-               e_dbg("MDI Write PHY Red Address %d Error\n", offset);
-               return -E1000_ERR_PHY;
-       }
-       if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
-               e_dbg("MDI Write offset error - requested %d, returned %d\n",
-                     offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
-               return -E1000_ERR_PHY;
-       }
+               ew32(MDIC, mdic);
 
-       /* Allow some time after each MDIC transaction to avoid
-        * reading duplicate data in the next MDIC transaction.
-        */
-       if (hw->mac.type == e1000_pch2lan)
-               udelay(100);
+               /* Poll the ready bit to see if the MDI read completed
+                * Increasing the time out as testing showed failures with
+                * the lower time out
+                */
+               for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) {
+                       usleep_range(50, 60);
+                       mdic = er32(MDIC);
+                       if (mdic & E1000_MDIC_READY)
+                               break;
+               }
+               if (!(mdic & E1000_MDIC_READY)) {
+                       e_dbg("MDI Write PHY Reg Address %d did not complete\n",
+                             offset);
+                       success = false;
+               }
+               if (mdic & E1000_MDIC_ERROR) {
+                       e_dbg("MDI Write PHY Reg Address %d Error\n", offset);
+                       success = false;
+               }
+               if (FIELD_GET(E1000_MDIC_REG_MASK, mdic) != offset) {
+                       e_dbg("MDI Write offset error - requested %d, returned %d\n",
+                             offset, FIELD_GET(E1000_MDIC_REG_MASK, mdic));
+                       success = false;
+               }
 
-       return 0;
+               /* Allow some time after each MDIC transaction to avoid
+                * reading duplicate data in the next MDIC transaction.
+                */
+               if (hw->mac.type == e1000_pch2lan)
+                       usleep_range(100, 150);
+
+               if (success)
+                       return 0;
+
+               if (retry_counter != retry_max) {
+                       e_dbg("Perform retry on PHY transaction...\n");
+                       mdelay(10);
+               }
+       }
+
+       return -E1000_ERR_PHY;
 }
 
 /**
index c48777d09523529c0977f1e19510aeb83d245f44..049bb325b4b14f15c674cff482e9bec68642e9e1 100644 (file)
@@ -51,6 +51,8 @@ s32 e1000e_read_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 *data);
 s32 e1000e_write_phy_reg_bm2(struct e1000_hw *hw, u32 offset, u16 data);
 void e1000_power_up_phy_copper(struct e1000_hw *hw);
 void e1000_power_down_phy_copper(struct e1000_hw *hw);
+void e1000e_disable_phy_retry(struct e1000_hw *hw);
+void e1000e_enable_phy_retry(struct e1000_hw *hw);
 s32 e1000e_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data);
 s32 e1000e_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data);
 s32 e1000_read_phy_reg_hv(struct e1000_hw *hw, u32 offset, u16 *data);
index ba24f3fa92c3719e1ac52d1a58457aa9a5773766..2fbabcdb5bb5f39264d7368f45ca4240902ebbdf 100644 (file)
@@ -955,6 +955,7 @@ struct i40e_q_vector {
        struct rcu_head rcu;    /* to avoid race with update stats on free */
        char name[I40E_INT_NAME_STR_LEN];
        bool arm_wb_state;
+       bool in_busy_poll;
        int irq_num;            /* IRQ assigned to this q_vector */
 } ____cacheline_internodealigned_in_smp;
 
index f86578857e8aee4136287816933d23125947bc3c..48b9ddb2b1b38b385527f137124ce86a36ac036d 100644 (file)
@@ -1253,8 +1253,11 @@ int i40e_count_filters(struct i40e_vsi *vsi)
        int bkt;
        int cnt = 0;
 
-       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
-               ++cnt;
+       hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+               if (f->state == I40E_FILTER_NEW ||
+                   f->state == I40E_FILTER_ACTIVE)
+                       ++cnt;
+       }
 
        return cnt;
 }
@@ -3911,6 +3914,12 @@ static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
                     q_vector->tx.target_itr >> 1);
                q_vector->tx.current_itr = q_vector->tx.target_itr;
 
+               /* Set ITR for software interrupts triggered after exiting
+                * busy-loop polling.
+                */
+               wr32(hw, I40E_PFINT_ITRN(I40E_SW_ITR, vector - 1),
+                    I40E_ITR_20K);
+
                wr32(hw, I40E_PFINT_RATEN(vector - 1),
                     i40e_intrl_usec_to_reg(vsi->int_rate_limit));
 
index 14ab642cafdb26f77e98c4c5e7962088103e2cad..432afbb6420137af3a6f13b75bb396f05a967362 100644 (file)
 #define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
 #define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
 #define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK I40E_MASK(0xFFF, I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
 #define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK I40E_MASK(0x1, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK I40E_MASK(0x3, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
 #define I40E_PFINT_ICR0 0x00038780 /* Reset: CORER */
 #define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
 #define I40E_PFINT_ICR0_INTEVENT_MASK I40E_MASK(0x1, I40E_PFINT_ICR0_INTEVENT_SHIFT)
index 0d7177083708f29d3b4deba11d00abdcb017f886..1a12b732818eeff5b1fe20887c94ddc245a9fc59 100644 (file)
@@ -2630,7 +2630,22 @@ process_next:
        return failure ? budget : (int)total_rx_packets;
 }
 
-static inline u32 i40e_buildreg_itr(const int type, u16 itr)
+/**
+ * i40e_buildreg_itr - build a value for writing to I40E_PFINT_DYN_CTLN register
+ * @itr_idx: interrupt throttling index
+ * @interval: interrupt throttling interval value in usecs
+ * @force_swint: force software interrupt
+ *
+ * The function builds a value for I40E_PFINT_DYN_CTLN register that
+ * is used to update interrupt throttling interval for specified ITR index
+ * and optionally enforces a software interrupt. If the @itr_idx is equal
+ * to I40E_ITR_NONE then no interval change is applied and only @force_swint
+ * parameter is taken into account. If the interval change and enforced
+ * software interrupt are not requested then the built value just enables
+ * appropriate vector interrupt.
+ **/
+static u32 i40e_buildreg_itr(enum i40e_dyn_idx itr_idx, u16 interval,
+                            bool force_swint)
 {
        u32 val;
 
@@ -2644,23 +2659,33 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
         * an event in the PBA anyway so we need to rely on the automask
         * to hold pending events for us until the interrupt is re-enabled
         *
-        * The itr value is reported in microseconds, and the register
-        * value is recorded in 2 microsecond units. For this reason we
-        * only need to shift by the interval shift - 1 instead of the
-        * full value.
+        * We have to shift the given value as it is reported in microseconds
+        * and the register value is recorded in 2 microsecond units.
         */
-       itr &= I40E_ITR_MASK;
+       interval >>= 1;
 
+       /* 1. Enable vector interrupt
+        * 2. Update the interval for the specified ITR index
+        *    (I40E_ITR_NONE in the register is used to indicate that
+        *     no interval update is requested)
+        */
        val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
-             (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
-             (itr << (I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT - 1));
+             FIELD_PREP(I40E_PFINT_DYN_CTLN_ITR_INDX_MASK, itr_idx) |
+             FIELD_PREP(I40E_PFINT_DYN_CTLN_INTERVAL_MASK, interval);
+
+       /* 3. Enforce software interrupt trigger if requested
+        *    (These software interrupts rate is limited by ITR2 that is
+        *     set to 20K interrupts per second)
+        */
+       if (force_swint)
+               val |= I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
+                      I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK |
+                      FIELD_PREP(I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK,
+                                 I40E_SW_ITR);
 
        return val;
 }
 
-/* a small macro to shorten up some long lines */
-#define INTREG I40E_PFINT_DYN_CTLN
-
 /* The act of updating the ITR will cause it to immediately trigger. In order
  * to prevent this from throwing off adaptive update statistics we defer the
  * update so that it can only happen so often. So after either Tx or Rx are
@@ -2679,8 +2704,10 @@ static inline u32 i40e_buildreg_itr(const int type, u16 itr)
 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
                                          struct i40e_q_vector *q_vector)
 {
+       enum i40e_dyn_idx itr_idx = I40E_ITR_NONE;
        struct i40e_hw *hw = &vsi->back->hw;
-       u32 intval;
+       u16 interval = 0;
+       u32 itr_val;
 
        /* If we don't have MSIX, then we only need to re-enable icr0 */
        if (!test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags)) {
@@ -2702,8 +2729,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
         */
        if (q_vector->rx.target_itr < q_vector->rx.current_itr) {
                /* Rx ITR needs to be reduced, this is highest priority */
-               intval = i40e_buildreg_itr(I40E_RX_ITR,
-                                          q_vector->rx.target_itr);
+               itr_idx = I40E_RX_ITR;
+               interval = q_vector->rx.target_itr;
                q_vector->rx.current_itr = q_vector->rx.target_itr;
                q_vector->itr_countdown = ITR_COUNTDOWN_START;
        } else if ((q_vector->tx.target_itr < q_vector->tx.current_itr) ||
@@ -2712,25 +2739,36 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
                /* Tx ITR needs to be reduced, this is second priority
                 * Tx ITR needs to be increased more than Rx, fourth priority
                 */
-               intval = i40e_buildreg_itr(I40E_TX_ITR,
-                                          q_vector->tx.target_itr);
+               itr_idx = I40E_TX_ITR;
+               interval = q_vector->tx.target_itr;
                q_vector->tx.current_itr = q_vector->tx.target_itr;
                q_vector->itr_countdown = ITR_COUNTDOWN_START;
        } else if (q_vector->rx.current_itr != q_vector->rx.target_itr) {
                /* Rx ITR needs to be increased, third priority */
-               intval = i40e_buildreg_itr(I40E_RX_ITR,
-                                          q_vector->rx.target_itr);
+               itr_idx = I40E_RX_ITR;
+               interval = q_vector->rx.target_itr;
                q_vector->rx.current_itr = q_vector->rx.target_itr;
                q_vector->itr_countdown = ITR_COUNTDOWN_START;
        } else {
                /* No ITR update, lowest priority */
-               intval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
                if (q_vector->itr_countdown)
                        q_vector->itr_countdown--;
        }
 
-       if (!test_bit(__I40E_VSI_DOWN, vsi->state))
-               wr32(hw, INTREG(q_vector->reg_idx), intval);
+       /* Do not update interrupt control register if VSI is down */
+       if (test_bit(__I40E_VSI_DOWN, vsi->state))
+               return;
+
+       /* Update ITR interval if necessary and enforce software interrupt
+        * if we are exiting busy poll.
+        */
+       if (q_vector->in_busy_poll) {
+               itr_val = i40e_buildreg_itr(itr_idx, interval, true);
+               q_vector->in_busy_poll = false;
+       } else {
+               itr_val = i40e_buildreg_itr(itr_idx, interval, false);
+       }
+       wr32(hw, I40E_PFINT_DYN_CTLN(q_vector->reg_idx), itr_val);
 }
 
 /**
@@ -2845,6 +2883,8 @@ tx_only:
         */
        if (likely(napi_complete_done(napi, work_done)))
                i40e_update_enable_itr(vsi, q_vector);
+       else
+               q_vector->in_busy_poll = true;
 
        return min(work_done, budget - 1);
 }
index abf15067eb5de4080e09780547763fee8cb32ab6..2cdc7de6301c131a3086d750237dc2c02488acac 100644 (file)
@@ -68,6 +68,7 @@ enum i40e_dyn_idx {
 /* these are indexes into ITRN registers */
 #define I40E_RX_ITR    I40E_IDX_ITR0
 #define I40E_TX_ITR    I40E_IDX_ITR1
+#define I40E_SW_ITR    I40E_IDX_ITR2
 
 /* Supported RSS offloads */
 #define I40E_DEFAULT_RSS_HENA ( \
index 83a34e98bdc79d077ac2fab52c50712b2404a914..232b65b9c8eacd364795b20a7271c05abe83311f 100644 (file)
@@ -1624,8 +1624,8 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
 {
        struct i40e_hw *hw = &pf->hw;
        struct i40e_vf *vf;
-       int i, v;
        u32 reg;
+       int i;
 
        /* If we don't have any VFs, then there is nothing to reset */
        if (!pf->num_alloc_vfs)
@@ -1636,11 +1636,10 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
                return false;
 
        /* Begin reset on all VFs at once */
-       for (v = 0; v < pf->num_alloc_vfs; v++) {
-               vf = &pf->vf[v];
+       for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
                /* If VF is being reset no need to trigger reset again */
                if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
-                       i40e_trigger_vf_reset(&pf->vf[v], flr);
+                       i40e_trigger_vf_reset(vf, flr);
        }
 
        /* HW requires some time to make sure it can flush the FIFO for a VF
@@ -1649,14 +1648,13 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
         * the VFs using a simple iterator that increments once that VF has
         * finished resetting.
         */
-       for (i = 0, v = 0; i < 10 && v < pf->num_alloc_vfs; i++) {
+       for (i = 0, vf = &pf->vf[0]; i < 10 && vf < &pf->vf[pf->num_alloc_vfs]; ++i) {
                usleep_range(10000, 20000);
 
                /* Check each VF in sequence, beginning with the VF to fail
                 * the previous check.
                 */
-               while (v < pf->num_alloc_vfs) {
-                       vf = &pf->vf[v];
+               while (vf < &pf->vf[pf->num_alloc_vfs]) {
                        if (!test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states)) {
                                reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
                                if (!(reg & I40E_VPGEN_VFRSTAT_VFRD_MASK))
@@ -1666,7 +1664,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
                        /* If the current VF has finished resetting, move on
                         * to the next VF in sequence.
                         */
-                       v++;
+                       ++vf;
                }
        }
 
@@ -1676,39 +1674,39 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
        /* Display a warning if at least one VF didn't manage to reset in
         * time, but continue on with the operation.
         */
-       if (v < pf->num_alloc_vfs)
+       if (vf < &pf->vf[pf->num_alloc_vfs])
                dev_err(&pf->pdev->dev, "VF reset check timeout on VF %d\n",
-                       pf->vf[v].vf_id);
+                       vf->vf_id);
        usleep_range(10000, 20000);
 
        /* Begin disabling all the rings associated with VFs, but do not wait
         * between each VF.
         */
-       for (v = 0; v < pf->num_alloc_vfs; v++) {
+       for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
                /* On initial reset, we don't have any queues to disable */
-               if (pf->vf[v].lan_vsi_idx == 0)
+               if (vf->lan_vsi_idx == 0)
                        continue;
 
                /* If VF is reset in another thread just continue */
                if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
                        continue;
 
-               i40e_vsi_stop_rings_no_wait(pf->vsi[pf->vf[v].lan_vsi_idx]);
+               i40e_vsi_stop_rings_no_wait(pf->vsi[vf->lan_vsi_idx]);
        }
 
        /* Now that we've notified HW to disable all of the VF rings, wait
         * until they finish.
         */
-       for (v = 0; v < pf->num_alloc_vfs; v++) {
+       for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
                /* On initial reset, we don't have any queues to disable */
-               if (pf->vf[v].lan_vsi_idx == 0)
+               if (vf->lan_vsi_idx == 0)
                        continue;
 
                /* If VF is reset in another thread just continue */
                if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
                        continue;
 
-               i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[v].lan_vsi_idx]);
+               i40e_vsi_wait_queues_disabled(pf->vsi[vf->lan_vsi_idx]);
        }
 
        /* Hw may need up to 50ms to finish disabling the RX queues. We
@@ -1717,12 +1715,12 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr)
        mdelay(50);
 
        /* Finish the reset on each VF */
-       for (v = 0; v < pf->num_alloc_vfs; v++) {
+       for (vf = &pf->vf[0]; vf < &pf->vf[pf->num_alloc_vfs]; ++vf) {
                /* If VF is reset in another thread just continue */
                if (test_bit(I40E_VF_STATE_RESETTING, &vf->vf_states))
                        continue;
 
-               i40e_cleanup_reset_vf(&pf->vf[v]);
+               i40e_cleanup_reset_vf(vf);
        }
 
        i40e_flush(hw);
@@ -3139,11 +3137,12 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg)
                /* Allow to delete VF primary MAC only if it was not set
                 * administratively by PF or if VF is trusted.
                 */
-               if (ether_addr_equal(addr, vf->default_lan_addr.addr) &&
-                   i40e_can_vf_change_mac(vf))
-                       was_unimac_deleted = true;
-               else
-                       continue;
+               if (ether_addr_equal(addr, vf->default_lan_addr.addr)) {
+                       if (i40e_can_vf_change_mac(vf))
+                               was_unimac_deleted = true;
+                       else
+                               continue;
+               }
 
                if (i40e_del_mac_filter(vsi, al->list[i].addr)) {
                        ret = -EINVAL;
index db4b2844e1f71820494ffe30cb67d87e8af91edc..d9f6cc71d900aa4e91ebe2ca5878a3ea467a89a2 100644 (file)
@@ -1002,8 +1002,8 @@ static void ice_get_itr_intrl_gran(struct ice_hw *hw)
  */
 int ice_init_hw(struct ice_hw *hw)
 {
-       struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
-       void *mac_buf __free(kfree);
+       struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
+       void *mac_buf __free(kfree) = NULL;
        u16 mac_buf_len;
        int status;
 
@@ -3272,7 +3272,7 @@ int ice_update_link_info(struct ice_port_info *pi)
                return status;
 
        if (li->link_info & ICE_AQ_MEDIA_AVAILABLE) {
-               struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+               struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
 
                pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
                if (!pcaps)
@@ -3420,7 +3420,7 @@ ice_cfg_phy_fc(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
 int
 ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update)
 {
-       struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+       struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
        struct ice_aqc_set_phy_cfg_data cfg = { 0 };
        struct ice_hw *hw;
        int status;
@@ -3561,7 +3561,7 @@ int
 ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg,
                enum ice_fec_mode fec)
 {
-       struct ice_aqc_get_phy_caps_data *pcaps __free(kfree);
+       struct ice_aqc_get_phy_caps_data *pcaps __free(kfree) = NULL;
        struct ice_hw *hw;
        int status;
 
index 255a9c8151b451713e31a88ecbf9f57c10e2b411..78b833b3e1d7ef05fcb1b91ede0d526acc19d7fd 100644 (file)
@@ -941,11 +941,11 @@ static u64 ice_loopback_test(struct net_device *netdev)
        struct ice_netdev_priv *np = netdev_priv(netdev);
        struct ice_vsi *orig_vsi = np->vsi, *test_vsi;
        struct ice_pf *pf = orig_vsi->back;
+       u8 *tx_frame __free(kfree) = NULL;
        u8 broadcast[ETH_ALEN], ret = 0;
        int num_frames, valid_frames;
        struct ice_tx_ring *tx_ring;
        struct ice_rx_ring *rx_ring;
-       u8 *tx_frame __free(kfree);
        int i;
 
        netdev_info(netdev, "loopback test\n");
index 80dc4bcdd3a41cd0baa0e3e29f0fddb29053341f..b3e1bdcb80f84d0d7295f86fcd56ad167f687559 100644 (file)
@@ -26,24 +26,22 @@ static void ice_port_vlan_on(struct ice_vsi *vsi)
        struct ice_vsi_vlan_ops *vlan_ops;
        struct ice_pf *pf = vsi->back;
 
-       if (ice_is_dvm_ena(&pf->hw)) {
-               vlan_ops = &vsi->outer_vlan_ops;
-
-               /* setup outer VLAN ops */
-               vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
-               vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+       /* setup inner VLAN ops */
+       vlan_ops = &vsi->inner_vlan_ops;
 
-               /* setup inner VLAN ops */
-               vlan_ops = &vsi->inner_vlan_ops;
+       if (ice_is_dvm_ena(&pf->hw)) {
                vlan_ops->add_vlan = noop_vlan_arg;
                vlan_ops->del_vlan = noop_vlan_arg;
                vlan_ops->ena_stripping = ice_vsi_ena_inner_stripping;
                vlan_ops->dis_stripping = ice_vsi_dis_inner_stripping;
                vlan_ops->ena_insertion = ice_vsi_ena_inner_insertion;
                vlan_ops->dis_insertion = ice_vsi_dis_inner_insertion;
-       } else {
-               vlan_ops = &vsi->inner_vlan_ops;
 
+               /* setup outer VLAN ops */
+               vlan_ops = &vsi->outer_vlan_ops;
+               vlan_ops->set_port_vlan = ice_vsi_set_outer_port_vlan;
+               vlan_ops->clear_port_vlan = ice_vsi_clear_outer_port_vlan;
+       } else {
                vlan_ops->set_port_vlan = ice_vsi_set_inner_port_vlan;
                vlan_ops->clear_port_vlan = ice_vsi_clear_inner_port_vlan;
        }
index 6dd7a66bb8979ab16a21b5a4243c03cc2957ff29..f5bc4a2780745e2dafce8455167362d6e3f2ce29 100644 (file)
@@ -2941,6 +2941,8 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
        rx_ptype = le16_get_bits(rx_desc->ptype_err_fflags0,
                                 VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M);
 
+       skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
+
        decoded = rxq->vport->rx_ptype_lkup[rx_ptype];
        /* If we don't know the ptype we can't do anything else with it. Just
         * pass it up the stack as-is.
@@ -2951,8 +2953,6 @@ static int idpf_rx_process_skb_fields(struct idpf_queue *rxq,
        /* process RSS/hash */
        idpf_rx_hash(rxq, skb, rx_desc, &decoded);
 
-       skb->protocol = eth_type_trans(skb, rxq->vport->netdev);
-
        if (le16_get_bits(rx_desc->hdrlen_flags,
                          VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M))
                return idpf_rx_rsc(rxq, skb, rx_desc, &decoded);
index 72e060cf6b6181d9ccd22b2f3a126c6b7f063a83..e9bf9231b0185db5113f99455ad50d31f8e432d5 100644 (file)
@@ -160,6 +160,8 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu)
                        continue;
                lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu));
                for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) {
+                       if (iter >= MAX_LMAC_COUNT)
+                               continue;
                        lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu),
                                              iter);
                        rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac);
index e350242bbafbadabf9d027a77a4a30dbbb234589..be709f83f3318cd8766ca6a2e6b51b88fa8a30e3 100644 (file)
@@ -1657,7 +1657,7 @@ static int npc_fwdb_detect_load_prfl_img(struct rvu *rvu, uint64_t prfl_sz,
        struct npc_coalesced_kpu_prfl *img_data = NULL;
        int i = 0, rc = -EINVAL;
        void __iomem *kpu_prfl_addr;
-       u16 offset;
+       u32 offset;
 
        img_data = (struct npc_coalesced_kpu_prfl __force *)rvu->kpu_prfl_addr;
        if (le64_to_cpu(img_data->signature) == KPU_SIGN &&
index b40bd0e467514848fee22f1e32f86cb4c9d129c3..3f46d5e0fb2ecbe184f75381ad589ec9c12da44a 100644 (file)
@@ -1933,7 +1933,7 @@ int otx2_open(struct net_device *netdev)
         * mcam entries are enabled to receive the packets. Hence disable the
         * packet I/O.
         */
-       if (err == EIO)
+       if (err == -EIO)
                goto err_disable_rxtx;
        else if (err)
                goto err_tx_stop_queues;
index 77134ca929382f7ef7a989c8e636a126349f742c..ba303868686a770dffa897b6707fefcfd9814b1b 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
+#include <linux/rtnetlink.h>
 #include <linux/skbuff.h>
 
 #include "mlxbf_gige.h"
@@ -492,8 +493,13 @@ static void mlxbf_gige_shutdown(struct platform_device *pdev)
 {
        struct mlxbf_gige *priv = platform_get_drvdata(pdev);
 
-       writeq(0, priv->base + MLXBF_GIGE_INT_EN);
-       mlxbf_gige_clean_port(priv);
+       rtnl_lock();
+       netif_device_detach(priv->netdev);
+
+       if (netif_running(priv->netdev))
+               dev_close(priv->netdev);
+
+       rtnl_unlock();
 }
 
 static const struct acpi_device_id __maybe_unused mlxbf_gige_acpi_match[] = {
index 59287c6e6cee6fe06632fdeef40775d11c86f4b4..d8af5e7e15b4d82ebf20c610cea7ab2ec19625a8 100644 (file)
@@ -601,7 +601,7 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size,
 
        *alloc_size = mtu + MANA_RXBUF_PAD + *headroom;
 
-       *datasize = ALIGN(mtu + ETH_HLEN, MANA_RX_DATA_ALIGN);
+       *datasize = mtu + ETH_HLEN;
 }
 
 static int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu)
index 5c879a5c86d70bf866c3edc1d2e646356b9239d0..6f1e6f386b7ba7277bc211765b8ef49d9fcd6750 100644 (file)
@@ -1314,17 +1314,40 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
        RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
 
+static void rtl_dash_loop_wait(struct rtl8169_private *tp,
+                              const struct rtl_cond *c,
+                              unsigned long usecs, int n, bool high)
+{
+       if (!tp->dash_enabled)
+               return;
+       rtl_loop_wait(tp, c, usecs, n, high);
+}
+
+static void rtl_dash_loop_wait_high(struct rtl8169_private *tp,
+                                   const struct rtl_cond *c,
+                                   unsigned long d, int n)
+{
+       rtl_dash_loop_wait(tp, c, d, n, true);
+}
+
+static void rtl_dash_loop_wait_low(struct rtl8169_private *tp,
+                                  const struct rtl_cond *c,
+                                  unsigned long d, int n)
+{
+       rtl_dash_loop_wait(tp, c, d, n, false);
+}
+
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
 {
        r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
-       rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+       rtl_dash_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168ep_driver_start(struct rtl8169_private *tp)
 {
        r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
        r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
-       rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
+       rtl_dash_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 30);
 }
 
 static void rtl8168_driver_start(struct rtl8169_private *tp)
@@ -1338,7 +1361,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
 static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
 {
        r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
-       rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
+       rtl_dash_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
@@ -1346,7 +1369,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
        rtl8168ep_stop_cmac(tp);
        r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
        r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
-       rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
+       rtl_dash_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168_driver_stop(struct rtl8169_private *tp)
@@ -5141,6 +5164,15 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
        struct mii_bus *new_bus;
        int ret;
 
+       /* On some boards with this chip version the BIOS is buggy and misses
+        * to reset the PHY page selector. This results in the PHY ID read
+        * accessing registers on a different page, returning a more or
+        * less random value. Fix this by resetting the page selector first.
+        */
+       if (tp->mac_version == RTL_GIGA_MAC_VER_25 ||
+           tp->mac_version == RTL_GIGA_MAC_VER_26)
+               r8169_mdio_write(tp, 0x1f, 0);
+
        new_bus = devm_mdiobus_alloc(&pdev->dev);
        if (!new_bus)
                return -ENOMEM;
index d1be030c88483ae217a84d9047d883dbf6cfad54..ba01c8cc3c906d5ea9a02029dc76fabc243b277c 100644 (file)
@@ -1324,12 +1324,12 @@ static int ravb_poll(struct napi_struct *napi, int budget)
        int q = napi - priv->napi;
        int mask = BIT(q);
        int quota = budget;
+       bool unmask;
 
        /* Processing RX Descriptor Ring */
        /* Clear RX interrupt */
        ravb_write(ndev, ~(mask | RIS0_RESERVED), RIS0);
-       if (ravb_rx(ndev, &quota, q))
-               goto out;
+       unmask = !ravb_rx(ndev, &quota, q);
 
        /* Processing TX Descriptor Ring */
        spin_lock_irqsave(&priv->lock, flags);
@@ -1339,6 +1339,18 @@ static int ravb_poll(struct napi_struct *napi, int budget)
        netif_wake_subqueue(ndev, q);
        spin_unlock_irqrestore(&priv->lock, flags);
 
+       /* Receive error message handling */
+       priv->rx_over_errors = priv->stats[RAVB_BE].rx_over_errors;
+       if (info->nc_queues)
+               priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
+       if (priv->rx_over_errors != ndev->stats.rx_over_errors)
+               ndev->stats.rx_over_errors = priv->rx_over_errors;
+       if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
+               ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
+
+       if (!unmask)
+               goto out;
+
        napi_complete(napi);
 
        /* Re-enable RX/TX interrupts */
@@ -1352,14 +1364,6 @@ static int ravb_poll(struct napi_struct *napi, int budget)
        }
        spin_unlock_irqrestore(&priv->lock, flags);
 
-       /* Receive error message handling */
-       priv->rx_over_errors =  priv->stats[RAVB_BE].rx_over_errors;
-       if (info->nc_queues)
-               priv->rx_over_errors += priv->stats[RAVB_NC].rx_over_errors;
-       if (priv->rx_over_errors != ndev->stats.rx_over_errors)
-               ndev->stats.rx_over_errors = priv->rx_over_errors;
-       if (priv->rx_fifo_errors != ndev->stats.rx_fifo_errors)
-               ndev->stats.rx_fifo_errors = priv->rx_fifo_errors;
 out:
        return budget - quota;
 }
index 6b6d0de0961975133d9990a5c296c916ecd00491..cef25efbdff99fdc07a313ab678869e83c85f79e 100644 (file)
@@ -92,19 +92,41 @@ static void dwmac4_rx_queue_priority(struct mac_device_info *hw,
                                     u32 prio, u32 queue)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 base_register;
-       u32 value;
+       u32 clear_mask = 0;
+       u32 ctrl2, ctrl3;
+       int i;
 
-       base_register = (queue < 4) ? GMAC_RXQ_CTRL2 : GMAC_RXQ_CTRL3;
-       if (queue >= 4)
-               queue -= 4;
+       ctrl2 = readl(ioaddr + GMAC_RXQ_CTRL2);
+       ctrl3 = readl(ioaddr + GMAC_RXQ_CTRL3);
 
-       value = readl(ioaddr + base_register);
+       /* The software must ensure that the same priority
+        * is not mapped to multiple Rx queues
+        */
+       for (i = 0; i < 4; i++)
+               clear_mask |= ((prio << GMAC_RXQCTRL_PSRQX_SHIFT(i)) &
+                                               GMAC_RXQCTRL_PSRQX_MASK(i));
+
+       ctrl2 &= ~clear_mask;
+       ctrl3 &= ~clear_mask;
+
+       /* First assign new priorities to a queue, then
+        * clear them from others queues
+        */
+       if (queue < 4) {
+               ctrl2 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+                                               GMAC_RXQCTRL_PSRQX_MASK(queue);
 
-       value &= ~GMAC_RXQCTRL_PSRQX_MASK(queue);
-       value |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
+               writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+               writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+       } else {
+               queue -= 4;
+
+               ctrl3 |= (prio << GMAC_RXQCTRL_PSRQX_SHIFT(queue)) &
                                                GMAC_RXQCTRL_PSRQX_MASK(queue);
-       writel(value, ioaddr + base_register);
+
+               writel(ctrl3, ioaddr + GMAC_RXQ_CTRL3);
+               writel(ctrl2, ioaddr + GMAC_RXQ_CTRL2);
+       }
 }
 
 static void dwmac4_tx_queue_priority(struct mac_device_info *hw,
index 1af2f89a0504ab4c7ad6042e52f5898ba064df6c..e841e312077ef0604c5b17a5473069cc4affadff 100644 (file)
@@ -105,17 +105,41 @@ static void dwxgmac2_rx_queue_prio(struct mac_device_info *hw, u32 prio,
                                   u32 queue)
 {
        void __iomem *ioaddr = hw->pcsr;
-       u32 value, reg;
+       u32 clear_mask = 0;
+       u32 ctrl2, ctrl3;
+       int i;
 
-       reg = (queue < 4) ? XGMAC_RXQ_CTRL2 : XGMAC_RXQ_CTRL3;
-       if (queue >= 4)
+       ctrl2 = readl(ioaddr + XGMAC_RXQ_CTRL2);
+       ctrl3 = readl(ioaddr + XGMAC_RXQ_CTRL3);
+
+       /* The software must ensure that the same priority
+        * is not mapped to multiple Rx queues
+        */
+       for (i = 0; i < 4; i++)
+               clear_mask |= ((prio << XGMAC_PSRQ_SHIFT(i)) &
+                                               XGMAC_PSRQ(i));
+
+       ctrl2 &= ~clear_mask;
+       ctrl3 &= ~clear_mask;
+
+       /* First assign new priorities to a queue, then
+        * clear them from others queues
+        */
+       if (queue < 4) {
+               ctrl2 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+                                               XGMAC_PSRQ(queue);
+
+               writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+               writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+       } else {
                queue -= 4;
 
-       value = readl(ioaddr + reg);
-       value &= ~XGMAC_PSRQ(queue);
-       value |= (prio << XGMAC_PSRQ_SHIFT(queue)) & XGMAC_PSRQ(queue);
+               ctrl3 |= (prio << XGMAC_PSRQ_SHIFT(queue)) &
+                                               XGMAC_PSRQ(queue);
 
-       writel(value, ioaddr + reg);
+               writel(ctrl3, ioaddr + XGMAC_RXQ_CTRL3);
+               writel(ctrl2, ioaddr + XGMAC_RXQ_CTRL2);
+       }
 }
 
 static void dwxgmac2_tx_queue_prio(struct mac_device_info *hw, u32 prio,
index 5b5d5e4310d127189cfdea18223f02460a6af149..2fa511227eac8490314c09821c7b6f1e1fdfed43 100644 (file)
@@ -20,6 +20,8 @@
 #include "txgbe_phy.h"
 #include "txgbe_hw.h"
 
+#define TXGBE_I2C_CLK_DEV_NAME "i2c_dw"
+
 static int txgbe_swnodes_register(struct txgbe *txgbe)
 {
        struct txgbe_nodes *nodes = &txgbe->nodes;
@@ -571,8 +573,8 @@ static int txgbe_clock_register(struct txgbe *txgbe)
        char clk_name[32];
        struct clk *clk;
 
-       snprintf(clk_name, sizeof(clk_name), "i2c_dw.%d",
-                pci_dev_id(pdev));
+       snprintf(clk_name, sizeof(clk_name), "%s.%d",
+                TXGBE_I2C_CLK_DEV_NAME, pci_dev_id(pdev));
 
        clk = clk_register_fixed_rate(NULL, clk_name, NULL, 0, 156250000);
        if (IS_ERR(clk))
@@ -634,7 +636,7 @@ static int txgbe_i2c_register(struct txgbe *txgbe)
 
        info.parent = &pdev->dev;
        info.fwnode = software_node_fwnode(txgbe->nodes.group[SWNODE_I2C]);
-       info.name = "i2c_designware";
+       info.name = TXGBE_I2C_CLK_DEV_NAME;
        info.id = pci_dev_id(pdev);
 
        info.res = &DEFINE_RES_IRQ(pdev->irq);
index 8b8634600c51903215665f22e1f0b8bec2728bc1..ddb50a0e2bc822c75d1c0126465c6026144291b7 100644 (file)
@@ -2431,6 +2431,7 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
        struct lan8814_ptp_rx_ts *rx_ts, *tmp;
        int txcfg = 0, rxcfg = 0;
        int pkt_ts_enable;
+       int tx_mod;
 
        ptp_priv->hwts_tx_type = config->tx_type;
        ptp_priv->rx_filter = config->rx_filter;
@@ -2477,9 +2478,14 @@ static int lan8814_hwtstamp(struct mii_timestamper *mii_ts,
        lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_RX_TIMESTAMP_EN, pkt_ts_enable);
        lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_TIMESTAMP_EN, pkt_ts_enable);
 
-       if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC)
+       tx_mod = lanphy_read_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD);
+       if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ONESTEP_SYNC) {
                lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
-                                     PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+                                     tx_mod | PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+       } else if (ptp_priv->hwts_tx_type == HWTSTAMP_TX_ON) {
+               lanphy_write_page_reg(ptp_priv->phydev, 5, PTP_TX_MOD,
+                                     tx_mod & ~PTP_TX_MOD_TX_PTP_SYNC_TS_INSERT_);
+       }
 
        if (config->rx_filter != HWTSTAMP_FILTER_NONE)
                lan8814_config_ts_intr(ptp_priv->phydev, true);
@@ -2537,7 +2543,7 @@ static void lan8814_txtstamp(struct mii_timestamper *mii_ts,
        }
 }
 
-static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
 {
        struct ptp_header *ptp_header;
        u32 type;
@@ -2547,7 +2553,11 @@ static void lan8814_get_sig_rx(struct sk_buff *skb, u16 *sig)
        ptp_header = ptp_parse_header(skb, type);
        skb_pull_inline(skb, ETH_HLEN);
 
+       if (!ptp_header)
+               return false;
+
        *sig = (__force u16)(ntohs(ptp_header->sequence_id));
+       return true;
 }
 
 static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2559,7 +2569,8 @@ static bool lan8814_match_rx_skb(struct kszphy_ptp_priv *ptp_priv,
        bool ret = false;
        u16 skb_sig;
 
-       lan8814_get_sig_rx(skb, &skb_sig);
+       if (!lan8814_get_sig_rx(skb, &skb_sig))
+               return ret;
 
        /* Iterate over all RX timestamps and match it with the received skbs */
        spin_lock_irqsave(&ptp_priv->rx_ts_lock, flags);
@@ -2834,7 +2845,7 @@ static int lan8814_ptpci_adjfine(struct ptp_clock_info *ptpci, long scaled_ppm)
        return 0;
 }
 
-static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
+static bool lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
 {
        struct ptp_header *ptp_header;
        u32 type;
@@ -2842,7 +2853,11 @@ static void lan8814_get_sig_tx(struct sk_buff *skb, u16 *sig)
        type = ptp_classify_raw(skb);
        ptp_header = ptp_parse_header(skb, type);
 
+       if (!ptp_header)
+               return false;
+
        *sig = (__force u16)(ntohs(ptp_header->sequence_id));
+       return true;
 }
 
 static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
@@ -2856,7 +2871,8 @@ static void lan8814_match_tx_skb(struct kszphy_ptp_priv *ptp_priv,
 
        spin_lock_irqsave(&ptp_priv->tx_queue.lock, flags);
        skb_queue_walk_safe(&ptp_priv->tx_queue, skb, skb_tmp) {
-               lan8814_get_sig_tx(skb, &skb_sig);
+               if (!lan8814_get_sig_tx(skb, &skb_sig))
+                       continue;
 
                if (memcmp(&skb_sig, &seq_id, sizeof(seq_id)))
                        continue;
@@ -2910,7 +2926,8 @@ static bool lan8814_match_skb(struct kszphy_ptp_priv *ptp_priv,
 
        spin_lock_irqsave(&ptp_priv->rx_queue.lock, flags);
        skb_queue_walk_safe(&ptp_priv->rx_queue, skb, skb_tmp) {
-               lan8814_get_sig_rx(skb, &skb_sig);
+               if (!lan8814_get_sig_rx(skb, &skb_sig))
+                       continue;
 
                if (memcmp(&skb_sig, &rx_ts->seq_id, sizeof(rx_ts->seq_id)))
                        continue;
index 88e084534853dd50505fd730e7ccd07c70f2d8ee..a9c418890a1cacc584c9265f4716fa2e18fe0e4b 100644 (file)
@@ -1273,6 +1273,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev)
 
        if (is_valid_ether_addr(mac)) {
                eth_hw_addr_set(dev->net, mac);
+               if (!is_local_ether_addr(mac))
+                       dev->net->addr_assign_type = NET_ADDR_PERM;
        } else {
                netdev_info(dev->net, "invalid MAC address, using random\n");
                eth_hw_addr_random(dev->net);
index ad29f370034e4f080ca7350e8114093446f798e6..8d2aee88526c69cace53949a94d5f8040d32bbe7 100644 (file)
@@ -285,6 +285,7 @@ static struct sk_buff *xennet_alloc_one_rx_buffer(struct netfront_queue *queue)
                return NULL;
        }
        skb_add_rx_frag(skb, 0, page, 0, 0, PAGE_SIZE);
+       skb_mark_for_recycle(skb);
 
        /* Align ip header to a 16 bytes boundary */
        skb_reserve(skb, NET_IP_ALIGN);
index d70f793ce4b38dd7bfb20c0d44392b018ab784ea..403525cc17833c18271d555e1469814845ddd45d 100644 (file)
@@ -443,7 +443,7 @@ of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
        if (IS_ERR(pwm))
                return pwm;
 
-       if (args->args_count > 1)
+       if (args->args_count > 0)
                pwm->args.period = args->args[0];
 
        pwm->args.polarity = PWM_POLARITY_NORMAL;
index 1b17dc4c219cc94aae8bf030526298aca3e29eaa..e25e48d76aa79c843e6873fa2ee8bc1a830bc7f5 100644 (file)
@@ -606,7 +606,7 @@ static int allocate_actors_buffer(struct power_allocator_params *params,
 
        /* There might be no cooling devices yet. */
        if (!num_actors) {
-               ret = -EINVAL;
+               ret = 0;
                goto clean_state;
        }
 
@@ -679,11 +679,6 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
                return -ENOMEM;
 
        get_governor_trips(tz, params);
-       if (!params->trip_max) {
-               dev_warn(&tz->device, "power_allocator: missing trip_max\n");
-               kfree(params);
-               return -EINVAL;
-       }
 
        ret = check_power_actors(tz, params);
        if (ret < 0) {
@@ -714,9 +709,10 @@ static int power_allocator_bind(struct thermal_zone_device *tz)
        else
                params->sustainable_power = tz->tzp->sustainable_power;
 
-       estimate_pid_constants(tz, tz->tzp->sustainable_power,
-                              params->trip_switch_on,
-                              params->trip_max->temperature);
+       if (params->trip_max)
+               estimate_pid_constants(tz, tz->tzp->sustainable_power,
+                                      params->trip_switch_on,
+                                      params->trip_max->temperature);
 
        reset_pid_controller(params);
 
index 9cdaa2faa5363333627e0cba54a4efe75b45b144..0f4f531c97800c648437fb2eb7409ccc2b198536 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1202,8 +1202,8 @@ static void aio_complete(struct aio_kiocb *iocb)
                spin_lock_irqsave(&ctx->wait.lock, flags);
                list_for_each_entry_safe(curr, next, &ctx->wait.head, w.entry)
                        if (avail >= curr->min_nr) {
-                               list_del_init_careful(&curr->w.entry);
                                wake_up_process(curr->w.private);
+                               list_del_init_careful(&curr->w.entry);
                        }
                spin_unlock_irqrestore(&ctx->wait.lock, flags);
        }
index b02796c8a595339a7127a4e96a90c4927ef85e60..66ca0bbee639492d6593655b7d9d061bc9125a4f 100644 (file)
@@ -17,6 +17,7 @@ bcachefs-y            :=      \
        btree_journal_iter.o    \
        btree_key_cache.o       \
        btree_locking.o         \
+       btree_node_scan.o       \
        btree_trans_commit.o    \
        btree_update.o          \
        btree_update_interior.o \
@@ -37,6 +38,7 @@ bcachefs-y            :=      \
        error.o                 \
        extents.o               \
        extent_update.o         \
+       eytzinger.o             \
        fs.o                    \
        fs-common.o             \
        fs-ioctl.o              \
@@ -67,6 +69,7 @@ bcachefs-y            :=      \
        quota.o                 \
        rebalance.o             \
        recovery.o              \
+       recovery_passes.o       \
        reflink.o               \
        replicas.o              \
        sb-clean.o              \
index 893e38f9db807f4c6d819a470339b6af97f85804..4ff56fa4d539201e648f28a9225e9d71a59907f0 100644 (file)
@@ -1713,34 +1713,37 @@ static int bch2_discard_one_bucket(struct btree_trans *trans,
        if (ret)
                goto out;
 
-       if (BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
-               a->v.gen++;
-               SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
-               goto write;
-       }
-
-       if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
-               if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
-                       bch2_trans_inconsistent(trans,
-                               "clearing need_discard but journal_seq %llu > flushed_seq %llu\n"
-                               "%s",
-                               a->v.journal_seq,
-                               c->journal.flushed_seq_ondisk,
-                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+       if (a->v.dirty_sectors) {
+               if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+                                              trans, "attempting to discard bucket with dirty data\n%s",
+                                              (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
                        ret = -EIO;
-               }
                goto out;
        }
 
        if (a->v.data_type != BCH_DATA_need_discard) {
-               if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info) {
-                       bch2_trans_inconsistent(trans,
-                               "bucket incorrectly set in need_discard btree\n"
-                               "%s",
-                               (bch2_bkey_val_to_text(&buf, c, k), buf.buf));
-                       ret = -EIO;
+               if (data_type_is_empty(a->v.data_type) &&
+                   BCH_ALLOC_V4_NEED_INC_GEN(&a->v)) {
+                       a->v.gen++;
+                       SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false);
+                       goto write;
                }
 
+               if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+                                              trans, "bucket incorrectly set in need_discard btree\n"
+                                              "%s",
+                                              (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+                       ret = -EIO;
+               goto out;
+       }
+
+       if (a->v.journal_seq > c->journal.flushed_seq_ondisk) {
+               if (bch2_trans_inconsistent_on(c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_info,
+                                              trans, "clearing need_discard but journal_seq %llu > flushed_seq %llu\n%s",
+                                              a->v.journal_seq,
+                                              c->journal.flushed_seq_ondisk,
+                                              (bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+                       ret = -EIO;
                goto out;
        }
 
@@ -1835,6 +1838,7 @@ static int bch2_clear_bucket_needs_discard(struct btree_trans *trans, struct bpo
        if (ret)
                goto err;
 
+       BUG_ON(a->v.dirty_sectors);
        SET_BCH_ALLOC_V4_NEED_DISCARD(&a->v, false);
        a->v.data_type = alloc_data_type(a->v, a->v.data_type);
 
@@ -1942,6 +1946,7 @@ static int invalidate_one_bucket(struct btree_trans *trans,
                goto out;
 
        BUG_ON(a->v.data_type != BCH_DATA_cached);
+       BUG_ON(a->v.dirty_sectors);
 
        if (!a->v.cached_sectors)
                bch_err(c, "invalidating empty bucket, confused");
index 214b15c84d1f3258d2ba23effacc5ad4e8852783..a1fc30adf9129da53ab8a97586dee34fd04878d9 100644 (file)
@@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
 static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
 {
        switch (watermark) {
-       case BCH_WATERMARK_reclaim:
+       case BCH_WATERMARK_interior_updates:
                return 0;
+       case BCH_WATERMARK_reclaim:
+               return OPEN_BUCKETS_COUNT / 6;
        case BCH_WATERMARK_btree:
        case BCH_WATERMARK_btree_copygc:
                return OPEN_BUCKETS_COUNT / 4;
index b91b7a46105608d089828db3bd65d1cc359475af..c2226e947c41fbcd7e462a7baa49d8726de44ecc 100644 (file)
@@ -22,7 +22,8 @@ struct bucket_alloc_state {
        x(copygc)                       \
        x(btree)                        \
        x(btree_copygc)                 \
-       x(reclaim)
+       x(reclaim)                      \
+       x(interior_updates)
 
 enum bch_watermark {
 #define x(name)        BCH_WATERMARK_##name,
index 8cb35ea572cb95ce13a956b21a4ec3a40ac39cdb..114328acde7202ed201fc8e776ed9cd73176d765 100644 (file)
@@ -8,6 +8,7 @@
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "btree_write_buffer.h"
+#include "checksum.h"
 #include "error.h"
 
 #include <linux/mm.h>
@@ -29,8 +30,7 @@ static bool extent_matches_bp(struct bch_fs *c,
                if (p.ptr.cached)
                        continue;
 
-               bch2_extent_ptr_to_bp(c, btree_id, level, k, p,
-                                     &bucket2, &bp2);
+               bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2);
                if (bpos_eq(bucket, bucket2) &&
                    !memcmp(&bp, &bp2, sizeof(bp)))
                        return true;
@@ -44,6 +44,11 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k,
                             struct printbuf *err)
 {
        struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k);
+
+       /* these will be caught by fsck */
+       if (!bch2_dev_exists2(c, bp.k->p.inode))
+               return 0;
+
        struct bpos bucket = bp_pos_to_bucket(c, bp.k->p);
        int ret = 0;
 
@@ -378,7 +383,7 @@ static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_
                        backpointer_to_missing_alloc,
                        "backpointer for nonexistent alloc key: %llu:%llu:0\n%s",
                        alloc_iter.pos.inode, alloc_iter.pos.offset,
-                       (bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) {
+                       (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
                ret = bch2_btree_delete_at(trans, bp_iter, 0);
                goto out;
        }
@@ -414,6 +419,84 @@ struct extents_to_bp_state {
        struct bkey_buf last_flushed;
 };
 
+static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree,
+                              struct bkey_s_c extent, unsigned dev)
+{
+       struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent);
+       int ret = PTR_ERR_OR_ZERO(n);
+       if (ret)
+               return ret;
+
+       bch2_bkey_drop_device(bkey_i_to_s(n), dev);
+       return bch2_btree_insert_trans(trans, btree, n, 0);
+}
+
+static int check_extent_checksum(struct btree_trans *trans,
+                                enum btree_id btree, struct bkey_s_c extent,
+                                enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev)
+{
+       struct bch_fs *c = trans->c;
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct printbuf buf = PRINTBUF;
+       void *data_buf = NULL;
+       struct bio *bio = NULL;
+       size_t bytes;
+       int ret = 0;
+
+       if (bkey_is_btree_ptr(extent.k))
+               return false;
+
+       bkey_for_each_ptr_decode(extent.k, ptrs, p, entry)
+               if (p.ptr.dev == dev)
+                       goto found;
+       BUG();
+found:
+       if (!p.crc.csum_type)
+               return false;
+
+       bytes = p.crc.compressed_size << 9;
+
+       struct bch_dev *ca = bch_dev_bkey_exists(c, dev);
+       if (!bch2_dev_get_ioref(ca, READ))
+               return false;
+
+       data_buf = kvmalloc(bytes, GFP_KERNEL);
+       if (!data_buf) {
+               ret = -ENOMEM;
+               goto err;
+       }
+
+       bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL);
+       bio->bi_iter.bi_sector = p.ptr.offset;
+       bch2_bio_map(bio, data_buf, bytes);
+       ret = submit_bio_wait(bio);
+       if (ret)
+               goto err;
+
+       prt_str(&buf, "extents pointing to same space, but first extent checksum bad:");
+       prt_printf(&buf, "\n  %s ", bch2_btree_id_str(btree));
+       bch2_bkey_val_to_text(&buf, c, extent);
+       prt_printf(&buf, "\n  %s ", bch2_btree_id_str(o_btree));
+       bch2_bkey_val_to_text(&buf, c, extent2);
+
+       struct nonce nonce = extent_nonce(extent.k->version, p.crc);
+       struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes);
+       if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum),
+                       c, dup_backpointer_to_bad_csum_extent,
+                       "%s", buf.buf))
+               ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1;
+fsck_err:
+err:
+       if (bio)
+               bio_put(bio);
+       kvfree(data_buf);
+       percpu_ref_put(&ca->io_ref);
+       printbuf_exit(&buf);
+       return ret;
+}
+
 static int check_bp_exists(struct btree_trans *trans,
                           struct extents_to_bp_state *s,
                           struct bpos bucket,
@@ -421,7 +504,8 @@ static int check_bp_exists(struct btree_trans *trans,
                           struct bkey_s_c orig_k)
 {
        struct bch_fs *c = trans->c;
-       struct btree_iter bp_iter = { NULL };
+       struct btree_iter bp_iter = {};
+       struct btree_iter other_extent_iter = {};
        struct printbuf buf = PRINTBUF;
        struct bkey_s_c bp_k;
        struct bkey_buf tmp;
@@ -429,13 +513,19 @@ static int check_bp_exists(struct btree_trans *trans,
 
        bch2_bkey_buf_init(&tmp);
 
+       if (!bch2_dev_bucket_exists(c, bucket)) {
+               prt_str(&buf, "extent for nonexistent device:bucket ");
+               bch2_bpos_to_text(&buf, bucket);
+               prt_str(&buf, "\n  ");
+               bch2_bkey_val_to_text(&buf, c, orig_k);
+               bch_err(c, "%s", buf.buf);
+               return -BCH_ERR_fsck_repair_unimplemented;
+       }
+
        if (bpos_lt(bucket, s->bucket_start) ||
            bpos_gt(bucket, s->bucket_end))
                return 0;
 
-       if (!bch2_dev_bucket_exists(c, bucket))
-               goto missing;
-
        bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers,
                                  bucket_pos_to_bp(c, bucket, bp.bucket_offset),
                                  0);
@@ -461,21 +551,94 @@ static int check_bp_exists(struct btree_trans *trans,
                        ret = -BCH_ERR_transaction_restart_write_buffer_flush;
                        goto out;
                }
-               goto missing;
+
+               goto check_existing_bp;
        }
 out:
 err:
 fsck_err:
+       bch2_trans_iter_exit(trans, &other_extent_iter);
        bch2_trans_iter_exit(trans, &bp_iter);
        bch2_bkey_buf_exit(&tmp, c);
        printbuf_exit(&buf);
        return ret;
+check_existing_bp:
+       /* Do we have a backpointer for a different extent? */
+       if (bp_k.k->type != KEY_TYPE_backpointer)
+               goto missing;
+
+       struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v;
+
+       struct bkey_s_c other_extent =
+               bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0);
+       ret = bkey_err(other_extent);
+       if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+               ret = 0;
+       if (ret)
+               goto err;
+
+       if (!other_extent.k)
+               goto missing;
+
+       if (bch2_extents_match(orig_k, other_extent)) {
+               printbuf_reset(&buf);
+               prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n  ");
+               bch2_bkey_val_to_text(&buf, c, orig_k);
+               prt_str(&buf, "\n  ");
+               bch2_bkey_val_to_text(&buf, c, other_extent);
+               bch_err(c, "%s", buf.buf);
+
+               if (other_extent.k->size <= orig_k.k->size) {
+                       ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode);
+                       if (ret)
+                               goto err;
+                       goto out;
+               } else {
+                       ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode);
+                       if (ret)
+                               goto err;
+                       goto missing;
+               }
+       }
+
+       ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode);
+       if (ret < 0)
+               goto err;
+       if (ret) {
+               ret = 0;
+               goto missing;
+       }
+
+       ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode);
+       if (ret < 0)
+               goto err;
+       if (ret) {
+               ret = 0;
+               goto out;
+       }
+
+       printbuf_reset(&buf);
+       prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n  ", bucket.inode);
+       bch2_bkey_val_to_text(&buf, c, orig_k);
+       prt_str(&buf, "\n  ");
+       bch2_bkey_val_to_text(&buf, c, other_extent);
+       bch_err(c, "%s", buf.buf);
+       ret = -BCH_ERR_fsck_repair_unimplemented;
+       goto err;
 missing:
+       printbuf_reset(&buf);
        prt_printf(&buf, "missing backpointer for btree=%s l=%u ",
               bch2_btree_id_str(bp.btree_id), bp.level);
        bch2_bkey_val_to_text(&buf, c, orig_k);
-       prt_printf(&buf, "\nbp pos ");
-       bch2_bpos_to_text(&buf, bp_iter.pos);
+       prt_printf(&buf, "\n  got:   ");
+       bch2_bkey_val_to_text(&buf, c, bp_k);
+
+       struct bkey_i_backpointer n_bp_k;
+       bkey_backpointer_init(&n_bp_k.k_i);
+       n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset);
+       n_bp_k.v = bp;
+       prt_printf(&buf, "\n  want:  ");
+       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i));
 
        if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf))
                ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true);
@@ -502,8 +665,7 @@ static int check_extent_to_backpointers(struct btree_trans *trans,
                if (p.ptr.cached)
                        continue;
 
-               bch2_extent_ptr_to_bp(c, btree, level,
-                                     k, p, &bucket_pos, &bp);
+               bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp);
 
                ret = check_bp_exists(trans, s, bucket_pos, bp, k);
                if (ret)
index 327365a9feac4e8fa69575ec6fe6157fd3edb127..da012ca7daee5501fe04be48bc875c918abbb33a 100644 (file)
@@ -90,20 +90,40 @@ static inline int bch2_bucket_backpointer_mod(struct btree_trans *trans,
        return bch2_trans_update_buffered(trans, BTREE_ID_backpointers, &bp_k.k_i);
 }
 
-static inline enum bch_data_type bkey_ptr_data_type(enum btree_id btree_id, unsigned level,
-                                                   struct bkey_s_c k, struct extent_ptr_decoded p)
+static inline enum bch_data_type bch2_bkey_ptr_data_type(struct bkey_s_c k,
+                                                        struct extent_ptr_decoded p,
+                                                        const union bch_extent_entry *entry)
 {
-       return  level           ? BCH_DATA_btree :
-               p.has_ec        ? BCH_DATA_stripe :
-                                 BCH_DATA_user;
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
+       case KEY_TYPE_btree_ptr_v2:
+               return BCH_DATA_btree;
+       case KEY_TYPE_extent:
+       case KEY_TYPE_reflink_v:
+               return p.has_ec ? BCH_DATA_stripe : BCH_DATA_user;
+       case KEY_TYPE_stripe: {
+               const struct bch_extent_ptr *ptr = &entry->ptr;
+               struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
+
+               BUG_ON(ptr < s.v->ptrs ||
+                      ptr >= s.v->ptrs + s.v->nr_blocks);
+
+               return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
+                       ? BCH_DATA_parity
+                       : BCH_DATA_user;
+       }
+       default:
+               BUG();
+       }
 }
 
 static inline void bch2_extent_ptr_to_bp(struct bch_fs *c,
                           enum btree_id btree_id, unsigned level,
                           struct bkey_s_c k, struct extent_ptr_decoded p,
+                          const union bch_extent_entry *entry,
                           struct bpos *bucket_pos, struct bch_backpointer *bp)
 {
-       enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+       enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
        s64 sectors = level ? btree_sectors(c) : k.k->size;
        u32 bucket_offset;
 
index 799aa32b6b4d990f913b0d5dfb98b6a47af1f0b2..a31a5f706929eb2006e4867a38123b9526639cee 100644 (file)
 #include "fifo.h"
 #include "nocow_locking_types.h"
 #include "opts.h"
-#include "recovery_types.h"
+#include "recovery_passes_types.h"
 #include "sb-errors_types.h"
 #include "seqmutex.h"
 #include "time_stats.h"
@@ -456,6 +456,7 @@ enum bch_time_stats {
 
 #include "alloc_types.h"
 #include "btree_types.h"
+#include "btree_node_scan_types.h"
 #include "btree_write_buffer_types.h"
 #include "buckets_types.h"
 #include "buckets_waiting_for_journal_types.h"
@@ -614,6 +615,7 @@ struct bch_dev {
  */
 
 #define BCH_FS_FLAGS()                 \
+       x(new_fs)                       \
        x(started)                      \
        x(may_go_rw)                    \
        x(rw)                           \
@@ -796,6 +798,7 @@ struct bch_fs {
                u64             features;
                u64             compat;
                unsigned long   errors_silent[BITS_TO_LONGS(BCH_SB_ERR_MAX)];
+               u64             btrees_lost_data;
        }                       sb;
 
 
@@ -810,7 +813,6 @@ struct bch_fs {
 
        /* snapshot.c: */
        struct snapshot_table __rcu *snapshots;
-       size_t                  snapshot_table_size;
        struct mutex            snapshot_table_lock;
        struct rw_semaphore     snapshot_create_lock;
 
@@ -1104,6 +1106,8 @@ struct bch_fs {
        struct journal_keys     journal_keys;
        struct list_head        journal_iters;
 
+       struct find_btree_nodes found_btree_nodes;
+
        u64                     last_bucket_seq_cleanup;
 
        u64                     counters_on_mount[BCH_COUNTER_NR];
index bff8750ac0d743aa22f2cbea9effbf77bf6be725..63102992d9556d1b33b445a3116df61964a6ca01 100644 (file)
@@ -818,6 +818,7 @@ struct bch_sb_field_ext {
        struct bch_sb_field     field;
        __le64                  recovery_passes_required[2];
        __le64                  errors_silent[8];
+       __le64                  btrees_lost_data;
 };
 
 struct bch_sb_field_downgrade_entry {
index 3fd1085b6c61ee72e7e814cf722306ebdba057c4..3bb477840eab6b1461f454c3c71b386589aa4afb 100644 (file)
@@ -134,18 +134,24 @@ void bch2_dump_btree_node_iter(struct btree *b,
        printbuf_exit(&buf);
 }
 
-#ifdef CONFIG_BCACHEFS_DEBUG
-
-void __bch2_verify_btree_nr_keys(struct btree *b)
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *b)
 {
        struct bset_tree *t;
        struct bkey_packed *k;
-       struct btree_nr_keys nr = { 0 };
+       struct btree_nr_keys nr = {};
 
        for_each_bset(b, t)
                bset_tree_for_each_key(b, t, k)
                        if (!bkey_deleted(k))
                                btree_keys_account_key_add(&nr, t - b->set, k);
+       return nr;
+}
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+
+void __bch2_verify_btree_nr_keys(struct btree *b)
+{
+       struct btree_nr_keys nr = bch2_btree_node_count_keys(b);
 
        BUG_ON(memcmp(&nr, &b->nr, sizeof(nr)));
 }
index 79c77baaa383868c99660a78a656c73d187f996f..120a79fd456bd5ccb50c2608aac4940b63cfeed9 100644 (file)
@@ -458,6 +458,8 @@ struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *,
 
 /* Accounting: */
 
+struct btree_nr_keys bch2_btree_node_count_keys(struct btree *);
+
 static inline void btree_keys_account_key(struct btree_nr_keys *n,
                                          unsigned bset,
                                          struct bkey_packed *k,
index 562561a9a510e8ce55cdee26a9b064d4c07cf02d..84474324dba9b508141f0e886bafbd8a95d47537 100644 (file)
@@ -808,7 +808,8 @@ static noinline void btree_bad_header(struct bch_fs *c, struct btree *b)
        prt_printf(&buf, "\nmax ");
        bch2_bpos_to_text(&buf, b->data->max_key);
 
-       bch2_fs_inconsistent(c, "%s", buf.buf);
+       bch2_fs_topology_error(c, "%s", buf.buf);
+
        printbuf_exit(&buf);
 }
 
@@ -1134,6 +1135,8 @@ void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k)
        b = btree_cache_find(bc, k);
        if (!b)
                return;
+
+       BUG_ON(b == btree_node_root(trans->c, b));
 wait_on_io:
        /* not allowed to wait on io with btree locks held: */
 
index bdaed29f084a4d558a160817e6fa6447c8547b0d..6280da1244b55032beaf60c4e2b29df0ff2c3152 100644 (file)
@@ -7,11 +7,13 @@
 #include "bcachefs.h"
 #include "alloc_background.h"
 #include "alloc_foreground.h"
+#include "backpointers.h"
 #include "bkey_methods.h"
 #include "bkey_buf.h"
 #include "btree_journal_iter.h"
 #include "btree_key_cache.h"
 #include "btree_locking.h"
+#include "btree_node_scan.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
 #include "btree_gc.h"
@@ -24,7 +26,7 @@
 #include "journal.h"
 #include "keylist.h"
 #include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
 #include "reflink.h"
 #include "replicas.h"
 #include "super-io.h"
@@ -40,6 +42,7 @@
 
 #define DROP_THIS_NODE         10
 #define DROP_PREV_NODE         11
+#define DID_FILL_FROM_SCAN     12
 
 static struct bkey_s unsafe_bkey_s_c_to_s(struct bkey_s_c k)
 {
@@ -70,90 +73,6 @@ static inline void gc_pos_set(struct bch_fs *c, struct gc_pos new_pos)
        __gc_pos_set(c, new_pos);
 }
 
-/*
- * Missing: if an interior btree node is empty, we need to do something -
- * perhaps just kill it
- */
-static int bch2_gc_check_topology(struct bch_fs *c,
-                                 struct btree *b,
-                                 struct bkey_buf *prev,
-                                 struct bkey_buf cur,
-                                 bool is_last)
-{
-       struct bpos node_start  = b->data->min_key;
-       struct bpos node_end    = b->data->max_key;
-       struct bpos expected_start = bkey_deleted(&prev->k->k)
-               ? node_start
-               : bpos_successor(prev->k->k.p);
-       struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
-       int ret = 0;
-
-       if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
-               struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(cur.k);
-
-               if (!bpos_eq(expected_start, bp->v.min_key)) {
-                       bch2_topology_error(c);
-
-                       if (bkey_deleted(&prev->k->k)) {
-                               prt_printf(&buf1, "start of node: ");
-                               bch2_bpos_to_text(&buf1, node_start);
-                       } else {
-                               bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(prev->k));
-                       }
-                       bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(cur.k));
-
-                       if (__fsck_err(c,
-                                      FSCK_CAN_FIX|
-                                      FSCK_CAN_IGNORE|
-                                      FSCK_NO_RATELIMIT,
-                                      btree_node_topology_bad_min_key,
-                                      "btree node with incorrect min_key at btree %s level %u:\n"
-                                      "  prev %s\n"
-                                      "  cur %s",
-                                      bch2_btree_id_str(b->c.btree_id), b->c.level,
-                                      buf1.buf, buf2.buf) && should_restart_for_topology_repair(c)) {
-                               bch_info(c, "Halting mark and sweep to start topology repair pass");
-                               ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
-                               goto err;
-                       } else {
-                               set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
-                       }
-               }
-       }
-
-       if (is_last && !bpos_eq(cur.k->k.p, node_end)) {
-               bch2_topology_error(c);
-
-               printbuf_reset(&buf1);
-               printbuf_reset(&buf2);
-
-               bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(cur.k));
-               bch2_bpos_to_text(&buf2, node_end);
-
-               if (__fsck_err(c, FSCK_CAN_FIX|FSCK_CAN_IGNORE|FSCK_NO_RATELIMIT,
-                         btree_node_topology_bad_max_key,
-                         "btree node with incorrect max_key at btree %s level %u:\n"
-                         "  %s\n"
-                         "  expected %s",
-                         bch2_btree_id_str(b->c.btree_id), b->c.level,
-                         buf1.buf, buf2.buf) &&
-                   should_restart_for_topology_repair(c)) {
-                       bch_info(c, "Halting mark and sweep to start topology repair pass");
-                       ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
-                       goto err;
-               } else {
-                       set_bit(BCH_FS_initial_gc_unfixed, &c->flags);
-               }
-       }
-
-       bch2_bkey_buf_copy(prev, c, cur.k);
-err:
-fsck_err:
-       printbuf_exit(&buf2);
-       printbuf_exit(&buf1);
-       return ret;
-}
-
 static void btree_ptr_to_v2(struct btree *b, struct bkey_i_btree_ptr_v2 *dst)
 {
        switch (b->key.k.type) {
@@ -212,6 +131,17 @@ static int set_node_min(struct bch_fs *c, struct btree *b, struct bpos new_min)
        struct bkey_i_btree_ptr_v2 *new;
        int ret;
 
+       if (c->opts.verbose) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+               prt_str(&buf, " -> ");
+               bch2_bpos_to_text(&buf, new_min);
+
+               bch_info(c, "%s(): %s", __func__, buf.buf);
+               printbuf_exit(&buf);
+       }
+
        new = kmalloc_array(BKEY_BTREE_PTR_U64s_MAX, sizeof(u64), GFP_KERNEL);
        if (!new)
                return -BCH_ERR_ENOMEM_gc_repair_key;
@@ -237,6 +167,17 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
        struct bkey_i_btree_ptr_v2 *new;
        int ret;
 
+       if (c->opts.verbose) {
+               struct printbuf buf = PRINTBUF;
+
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+               prt_str(&buf, " -> ");
+               bch2_bpos_to_text(&buf, new_max);
+
+               bch_info(c, "%s(): %s", __func__, buf.buf);
+               printbuf_exit(&buf);
+       }
+
        ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level + 1, b->key.k.p);
        if (ret)
                return ret;
@@ -268,127 +209,138 @@ static int set_node_max(struct bch_fs *c, struct btree *b, struct bpos new_max)
        return 0;
 }
 
-static int btree_repair_node_boundaries(struct bch_fs *c, struct btree *b,
-                                       struct btree *prev, struct btree *cur)
+static int btree_check_node_boundaries(struct bch_fs *c, struct btree *b,
+                                      struct btree *prev, struct btree *cur,
+                                      struct bpos *pulled_from_scan)
 {
        struct bpos expected_start = !prev
                ? b->data->min_key
                : bpos_successor(prev->key.k.p);
-       struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+       struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       if (!prev) {
-               prt_printf(&buf1, "start of node: ");
-               bch2_bpos_to_text(&buf1, b->data->min_key);
-       } else {
-               bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&prev->key));
+       BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+              !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+                       b->data->min_key));
+
+       if (bpos_eq(expected_start, cur->data->min_key))
+               return 0;
+
+       prt_printf(&buf, "  at btree %s level %u:\n  parent: ",
+                  bch2_btree_id_str(b->c.btree_id), b->c.level);
+       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+       if (prev) {
+               prt_printf(&buf, "\n  prev: ");
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&prev->key));
        }
 
-       bch2_bkey_val_to_text(&buf2, c, bkey_i_to_s_c(&cur->key));
-
-       if (prev &&
-           bpos_gt(expected_start, cur->data->min_key) &&
-           BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {
-               /* cur overwrites prev: */
-
-               if (mustfix_fsck_err_on(bpos_ge(prev->data->min_key,
-                                               cur->data->min_key), c,
-                               btree_node_topology_overwritten_by_next_node,
-                               "btree node overwritten by next node at btree %s level %u:\n"
-                               "  node %s\n"
-                               "  next %s",
-                               bch2_btree_id_str(b->c.btree_id), b->c.level,
-                               buf1.buf, buf2.buf)) {
-                       ret = DROP_PREV_NODE;
-                       goto out;
-               }
+       prt_str(&buf, "\n  next: ");
+       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&cur->key));
 
-               if (mustfix_fsck_err_on(!bpos_eq(prev->key.k.p,
-                                                bpos_predecessor(cur->data->min_key)), c,
-                               btree_node_topology_bad_max_key,
-                               "btree node with incorrect max_key at btree %s level %u:\n"
-                               "  node %s\n"
-                               "  next %s",
-                               bch2_btree_id_str(b->c.btree_id), b->c.level,
-                               buf1.buf, buf2.buf))
-                       ret = set_node_max(c, prev,
-                                          bpos_predecessor(cur->data->min_key));
-       } else {
-               /* prev overwrites cur: */
-
-               if (mustfix_fsck_err_on(bpos_ge(expected_start,
-                                               cur->data->max_key), c,
-                               btree_node_topology_overwritten_by_prev_node,
-                               "btree node overwritten by prev node at btree %s level %u:\n"
-                               "  prev %s\n"
-                               "  node %s",
-                               bch2_btree_id_str(b->c.btree_id), b->c.level,
-                               buf1.buf, buf2.buf)) {
-                       ret = DROP_THIS_NODE;
-                       goto out;
-               }
+       if (bpos_lt(expected_start, cur->data->min_key)) {                              /* gap */
+               if (b->c.level == 1 &&
+                   bpos_lt(*pulled_from_scan, cur->data->min_key)) {
+                       ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+                                                    expected_start,
+                                                    bpos_predecessor(cur->data->min_key));
+                       if (ret)
+                               goto err;
 
-               if (mustfix_fsck_err_on(!bpos_eq(expected_start, cur->data->min_key), c,
-                               btree_node_topology_bad_min_key,
-                               "btree node with incorrect min_key at btree %s level %u:\n"
-                               "  prev %s\n"
-                               "  node %s",
-                               bch2_btree_id_str(b->c.btree_id), b->c.level,
-                               buf1.buf, buf2.buf))
-                       ret = set_node_min(c, cur, expected_start);
+                       *pulled_from_scan = cur->data->min_key;
+                       ret = DID_FILL_FROM_SCAN;
+               } else {
+                       if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+                                            "btree node with incorrect min_key%s", buf.buf))
+                               ret = set_node_min(c, cur, expected_start);
+               }
+       } else {                                                                        /* overlap */
+               if (prev && BTREE_NODE_SEQ(cur->data) > BTREE_NODE_SEQ(prev->data)) {   /* cur overwrites prev */
+                       if (bpos_ge(prev->data->min_key, cur->data->min_key)) {         /* fully? */
+                               if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_next_node,
+                                                    "btree node overwritten by next node%s", buf.buf))
+                                       ret = DROP_PREV_NODE;
+                       } else {
+                               if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+                                                    "btree node with incorrect max_key%s", buf.buf))
+                                       ret = set_node_max(c, prev,
+                                                          bpos_predecessor(cur->data->min_key));
+                       }
+               } else {
+                       if (bpos_ge(expected_start, cur->data->max_key)) {              /* fully? */
+                               if (mustfix_fsck_err(c, btree_node_topology_overwritten_by_prev_node,
+                                                    "btree node overwritten by prev node%s", buf.buf))
+                                       ret = DROP_THIS_NODE;
+                       } else {
+                               if (mustfix_fsck_err(c, btree_node_topology_bad_min_key,
+                                                    "btree node with incorrect min_key%s", buf.buf))
+                                       ret = set_node_min(c, cur, expected_start);
+                       }
+               }
        }
-out:
+err:
 fsck_err:
-       printbuf_exit(&buf2);
-       printbuf_exit(&buf1);
+       printbuf_exit(&buf);
        return ret;
 }
 
 static int btree_repair_node_end(struct bch_fs *c, struct btree *b,
-                                struct btree *child)
+                                struct btree *child, struct bpos *pulled_from_scan)
 {
-       struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+       struct printbuf buf = PRINTBUF;
        int ret = 0;
 
-       bch2_bkey_val_to_text(&buf1, c, bkey_i_to_s_c(&child->key));
-       bch2_bpos_to_text(&buf2, b->key.k.p);
+       if (bpos_eq(child->key.k.p, b->key.k.p))
+               return 0;
 
-       if (mustfix_fsck_err_on(!bpos_eq(child->key.k.p, b->key.k.p), c,
-                               btree_node_topology_bad_max_key,
-                       "btree node with incorrect max_key at btree %s level %u:\n"
-                       "  %s\n"
-                       "  expected %s",
-                       bch2_btree_id_str(b->c.btree_id), b->c.level,
-                       buf1.buf, buf2.buf)) {
-               ret = set_node_max(c, child, b->key.k.p);
-               if (ret)
-                       goto err;
+       prt_printf(&buf, "at btree %s level %u:\n  parent: ",
+                  bch2_btree_id_str(b->c.btree_id), b->c.level);
+       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+       prt_str(&buf, "\n  child: ");
+       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&child->key));
+
+       if (mustfix_fsck_err(c, btree_node_topology_bad_max_key,
+                            "btree node with incorrect max_key%s", buf.buf)) {
+               if (b->c.level == 1 &&
+                   bpos_lt(*pulled_from_scan, b->key.k.p)) {
+                       ret = bch2_get_scanned_nodes(c, b->c.btree_id, 0,
+                                               bpos_successor(child->key.k.p), b->key.k.p);
+                       if (ret)
+                               goto err;
+
+                       *pulled_from_scan = b->key.k.p;
+                       ret = DID_FILL_FROM_SCAN;
+               } else {
+                       ret = set_node_max(c, child, b->key.k.p);
+               }
        }
 err:
 fsck_err:
-       printbuf_exit(&buf2);
-       printbuf_exit(&buf1);
+       printbuf_exit(&buf);
        return ret;
 }
 
-static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b)
+static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct btree *b,
+                                             struct bpos *pulled_from_scan)
 {
        struct bch_fs *c = trans->c;
        struct btree_and_journal_iter iter;
        struct bkey_s_c k;
        struct bkey_buf prev_k, cur_k;
        struct btree *prev = NULL, *cur = NULL;
-       bool have_child, dropped_children = false;
+       bool have_child, new_pass = false;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
        if (!b->c.level)
                return 0;
-again:
-       prev = NULL;
-       have_child = dropped_children = false;
+
        bch2_bkey_buf_init(&prev_k);
        bch2_bkey_buf_init(&cur_k);
+again:
+       cur = prev = NULL;
+       have_child = new_pass = false;
        bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
        iter.prefetch = true;
 
@@ -415,9 +367,10 @@ again:
                                b->c.level - 1,
                                buf.buf)) {
                        bch2_btree_node_evict(trans, cur_k.k);
-                       ret = bch2_journal_key_delete(c, b->c.btree_id,
-                                                     b->c.level, cur_k.k->k.p);
                        cur = NULL;
+                       ret =   bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?:
+                               bch2_journal_key_delete(c, b->c.btree_id,
+                                                       b->c.level, cur_k.k->k.p);
                        if (ret)
                                break;
                        continue;
@@ -427,7 +380,23 @@ again:
                if (ret)
                        break;
 
-               ret = btree_repair_node_boundaries(c, b, prev, cur);
+               if (bch2_btree_node_is_stale(c, cur)) {
+                       bch_info(c, "btree node %s older than nodes found by scanning", buf.buf);
+                       six_unlock_read(&cur->c.lock);
+                       bch2_btree_node_evict(trans, cur_k.k);
+                       ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                     b->c.level, cur_k.k->k.p);
+                       cur = NULL;
+                       if (ret)
+                               break;
+                       continue;
+               }
+
+               ret = btree_check_node_boundaries(c, b, prev, cur, pulled_from_scan);
+               if (ret == DID_FILL_FROM_SCAN) {
+                       new_pass = true;
+                       ret = 0;
+               }
 
                if (ret == DROP_THIS_NODE) {
                        six_unlock_read(&cur->c.lock);
@@ -445,6 +414,7 @@ again:
                prev = NULL;
 
                if (ret == DROP_PREV_NODE) {
+                       bch_info(c, "dropped prev node");
                        bch2_btree_node_evict(trans, prev_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
                                                      b->c.level, prev_k.k->k.p);
@@ -452,8 +422,6 @@ again:
                                break;
 
                        bch2_btree_and_journal_iter_exit(&iter);
-                       bch2_bkey_buf_exit(&prev_k, c);
-                       bch2_bkey_buf_exit(&cur_k, c);
                        goto again;
                } else if (ret)
                        break;
@@ -465,7 +433,11 @@ again:
 
        if (!ret && !IS_ERR_OR_NULL(prev)) {
                BUG_ON(cur);
-               ret = btree_repair_node_end(c, b, prev);
+               ret = btree_repair_node_end(c, b, prev, pulled_from_scan);
+               if (ret == DID_FILL_FROM_SCAN) {
+                       new_pass = true;
+                       ret = 0;
+               }
        }
 
        if (!IS_ERR_OR_NULL(prev))
@@ -479,6 +451,10 @@ again:
                goto err;
 
        bch2_btree_and_journal_iter_exit(&iter);
+
+       if (new_pass)
+               goto again;
+
        bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
        iter.prefetch = true;
 
@@ -495,7 +471,7 @@ again:
                if (ret)
                        goto err;
 
-               ret = bch2_btree_repair_topology_recurse(trans, cur);
+               ret = bch2_btree_repair_topology_recurse(trans, cur, pulled_from_scan);
                six_unlock_read(&cur->c.lock);
                cur = NULL;
 
@@ -503,7 +479,7 @@ again:
                        bch2_btree_node_evict(trans, cur_k.k);
                        ret = bch2_journal_key_delete(c, b->c.btree_id,
                                                      b->c.level, cur_k.k->k.p);
-                       dropped_children = true;
+                       new_pass = true;
                }
 
                if (ret)
@@ -530,12 +506,14 @@ fsck_err:
                six_unlock_read(&cur->c.lock);
 
        bch2_btree_and_journal_iter_exit(&iter);
-       bch2_bkey_buf_exit(&prev_k, c);
-       bch2_bkey_buf_exit(&cur_k, c);
 
-       if (!ret && dropped_children)
+       if (!ret && new_pass)
                goto again;
 
+       BUG_ON(!ret && bch2_btree_node_check_topology(trans, b));
+
+       bch2_bkey_buf_exit(&prev_k, c);
+       bch2_bkey_buf_exit(&cur_k, c);
        printbuf_exit(&buf);
        return ret;
 }
@@ -543,32 +521,63 @@ fsck_err:
 int bch2_check_topology(struct bch_fs *c)
 {
        struct btree_trans *trans = bch2_trans_get(c);
-       struct btree *b;
-       unsigned i;
+       struct bpos pulled_from_scan = POS_MIN;
        int ret = 0;
 
-       for (i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
+       for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
                struct btree_root *r = bch2_btree_id_root(c, i);
+               bool reconstructed_root = false;
 
-               if (!r->alive)
-                       continue;
+               if (r->error) {
+                       ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+                       if (ret)
+                               break;
+reconstruct_root:
+                       bch_info(c, "btree root %s unreadable, must recover from scan", bch2_btree_id_str(i));
 
-               b = r->b;
-               if (btree_node_fake(b))
-                       continue;
+                       r->alive = false;
+                       r->error = 0;
+
+                       if (!bch2_btree_has_scanned_nodes(c, i)) {
+                               mustfix_fsck_err(c, btree_root_unreadable_and_scan_found_nothing,
+                                                "no nodes found for btree %s, continue?", bch2_btree_id_str(i));
+                               bch2_btree_root_alloc_fake(c, i, 0);
+                       } else {
+                               bch2_btree_root_alloc_fake(c, i, 1);
+                               ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
+                               if (ret)
+                                       break;
+                       }
+
+                       bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+                       reconstructed_root = true;
+               }
+
+               struct btree *b = r->b;
 
                btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
-               ret = bch2_btree_repair_topology_recurse(trans, b);
+               ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan);
                six_unlock_read(&b->c.lock);
 
                if (ret == DROP_THIS_NODE) {
-                       bch_err(c, "empty btree root - repair unimplemented");
-                       ret = -BCH_ERR_fsck_repair_unimplemented;
+                       bch2_btree_node_hash_remove(&c->btree_cache, b);
+                       mutex_lock(&c->btree_cache.lock);
+                       list_move(&b->list, &c->btree_cache.freeable);
+                       mutex_unlock(&c->btree_cache.lock);
+
+                       r->b = NULL;
+
+                       if (!reconstructed_root)
+                               goto reconstruct_root;
+
+                       bch_err(c, "empty btree root %s", bch2_btree_id_str(i));
+                       bch2_btree_root_alloc_fake(c, i, 0);
+                       r->alive = false;
+                       ret = 0;
                }
        }
-
+fsck_err:
        bch2_trans_put(trans);
-
        return ret;
 }
 
@@ -591,7 +600,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
        bkey_for_each_ptr_decode(k->k, ptrs_c, p, entry_c) {
                struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
                struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
-               enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, &entry_c->ptr);
+               enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, p, entry_c);
 
                if (fsck_err_on(!g->gen_valid,
                                c, ptr_to_missing_alloc_key,
@@ -657,7 +666,8 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                        continue;
 
                if (fsck_err_on(bucket_data_type(g->data_type) &&
-                               bucket_data_type(g->data_type) != data_type, c,
+                               bucket_data_type(g->data_type) !=
+                               bucket_data_type(data_type), c,
                                ptr_bucket_data_type_mismatch,
                                "bucket %u:%zu different types of data in same bucket: %s, %s\n"
                                "while marking %s",
@@ -698,18 +708,13 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
        }
 
        if (do_update) {
-               struct bkey_ptrs ptrs;
-               union bch_extent_entry *entry;
-               struct bch_extent_ptr *ptr;
-               struct bkey_i *new;
-
                if (is_root) {
                        bch_err(c, "cannot update btree roots yet");
                        ret = -EINVAL;
                        goto err;
                }
 
-               new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
+               struct bkey_i *new = kmalloc(bkey_bytes(k->k), GFP_KERNEL);
                if (!new) {
                        ret = -BCH_ERR_ENOMEM_gc_repair_key;
                        bch_err_msg(c, ret, "allocating new key");
@@ -724,7 +729,7 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                         * btree node isn't there anymore, the read path will
                         * sort it out:
                         */
-                       ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+                       struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
                        bkey_for_each_ptr(ptrs, ptr) {
                                struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
                                struct bucket *g = PTR_GC_BUCKET(ca, ptr);
@@ -732,19 +737,26 @@ static int bch2_check_fix_ptrs(struct btree_trans *trans, enum btree_id btree_id
                                ptr->gen = g->gen;
                        }
                } else {
-                       bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, ({
-                               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-                               struct bucket *g = PTR_GC_BUCKET(ca, ptr);
-                               enum bch_data_type data_type = bch2_bkey_ptr_data_type(*k, ptr);
-
-                               (ptr->cached &&
-                                (!g->gen_valid || gen_cmp(ptr->gen, g->gen) > 0)) ||
-                               (!ptr->cached &&
-                                gen_cmp(ptr->gen, g->gen) < 0) ||
-                               gen_cmp(g->gen, ptr->gen) > BUCKET_GC_GEN_MAX ||
-                               (g->data_type &&
-                                g->data_type != data_type);
-                       }));
+                       struct bkey_ptrs ptrs;
+                       union bch_extent_entry *entry;
+restart_drop_ptrs:
+                       ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
+                       bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) {
+                               struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+                               struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
+                               enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry);
+
+                               if ((p.ptr.cached &&
+                                    (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) ||
+                                   (!p.ptr.cached &&
+                                    gen_cmp(p.ptr.gen, g->gen) < 0) ||
+                                   gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX ||
+                                   (g->data_type &&
+                                    g->data_type != data_type)) {
+                                       bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr);
+                                       goto restart_drop_ptrs;
+                               }
+                       }
 again:
                        ptrs = bch2_bkey_ptrs(bkey_i_to_s(new));
                        bkey_extent_entry_for_each(ptrs, entry) {
@@ -774,12 +786,6 @@ found:
                        }
                }
 
-               ret = bch2_journal_key_insert_take(c, btree_id, level, new);
-               if (ret) {
-                       kfree(new);
-                       goto err;
-               }
-
                if (level)
                        bch2_btree_node_update_key_early(trans, btree_id, level - 1, *k, new);
 
@@ -793,6 +799,12 @@ found:
                        bch_info(c, "new key %s", buf.buf);
                }
 
+               ret = bch2_journal_key_insert_take(c, btree_id, level, new);
+               if (ret) {
+                       kfree(new);
+                       goto err;
+               }
+
                *k = bkey_i_to_s_c(new);
        }
 err:
@@ -819,10 +831,6 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
                BUG_ON(bch2_journal_seq_verify &&
                       k->k->version.lo > atomic64_read(&c->journal.seq));
 
-               ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
-               if (ret)
-                       goto err;
-
                if (fsck_err_on(k->k->version.lo > atomic64_read(&c->key_version), c,
                                bkey_version_in_future,
                                "key version number higher than recorded: %llu > %llu",
@@ -831,8 +839,13 @@ static int bch2_gc_mark_key(struct btree_trans *trans, enum btree_id btree_id,
                        atomic64_set(&c->key_version, k->k->version.lo);
        }
 
+       ret = bch2_check_fix_ptrs(trans, btree_id, level, is_root, k);
+       if (ret)
+               goto err;
+
        ret = commit_do(trans, NULL, NULL, 0,
-                       bch2_key_trigger(trans, btree_id, level, old, unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
+                       bch2_key_trigger(trans, btree_id, level, old,
+                                        unsafe_bkey_s_c_to_s(*k), BTREE_TRIGGER_GC));
 fsck_err:
 err:
        bch_err_fn(c, ret);
@@ -841,42 +854,30 @@ err:
 
 static int btree_gc_mark_node(struct btree_trans *trans, struct btree *b, bool initial)
 {
-       struct bch_fs *c = trans->c;
        struct btree_node_iter iter;
        struct bkey unpacked;
        struct bkey_s_c k;
-       struct bkey_buf prev, cur;
        int ret = 0;
 
+       ret = bch2_btree_node_check_topology(trans, b);
+       if (ret)
+               return ret;
+
        if (!btree_node_type_needs_gc(btree_node_type(b)))
                return 0;
 
        bch2_btree_node_iter_init_from_start(&iter, b);
-       bch2_bkey_buf_init(&prev);
-       bch2_bkey_buf_init(&cur);
-       bkey_init(&prev.k->k);
 
        while ((k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked)).k) {
                ret = bch2_gc_mark_key(trans, b->c.btree_id, b->c.level, false,
                                       &k, initial);
                if (ret)
-                       break;
+                       return ret;
 
                bch2_btree_node_iter_advance(&iter, b);
-
-               if (b->c.level) {
-                       bch2_bkey_buf_reassemble(&cur, c, k);
-
-                       ret = bch2_gc_check_topology(c, b, &prev, cur,
-                                       bch2_btree_node_iter_end(&iter));
-                       if (ret)
-                               break;
-               }
        }
 
-       bch2_bkey_buf_exit(&cur, c);
-       bch2_bkey_buf_exit(&prev, c);
-       return ret;
+       return 0;
 }
 
 static int bch2_gc_btree(struct btree_trans *trans, enum btree_id btree_id,
@@ -925,14 +926,16 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
        struct bch_fs *c = trans->c;
        struct btree_and_journal_iter iter;
        struct bkey_s_c k;
-       struct bkey_buf cur, prev;
+       struct bkey_buf cur;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
 
+       ret = bch2_btree_node_check_topology(trans, b);
+       if (ret)
+               return ret;
+
        bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
-       bch2_bkey_buf_init(&prev);
        bch2_bkey_buf_init(&cur);
-       bkey_init(&prev.k->k);
 
        while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                BUG_ON(bpos_lt(k.k->p, b->data->min_key));
@@ -943,20 +946,7 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
                if (ret)
                        goto fsck_err;
 
-               if (b->c.level) {
-                       bch2_bkey_buf_reassemble(&cur, c, k);
-                       k = bkey_i_to_s_c(cur.k);
-
-                       bch2_btree_and_journal_iter_advance(&iter);
-
-                       ret = bch2_gc_check_topology(c, b,
-                                       &prev, cur,
-                                       !bch2_btree_and_journal_iter_peek(&iter).k);
-                       if (ret)
-                               goto fsck_err;
-               } else {
-                       bch2_btree_and_journal_iter_advance(&iter);
-               }
+               bch2_btree_and_journal_iter_advance(&iter);
        }
 
        if (b->c.level > target_depth) {
@@ -1015,7 +1005,6 @@ static int bch2_gc_btree_init_recurse(struct btree_trans *trans, struct btree *b
        }
 fsck_err:
        bch2_bkey_buf_exit(&cur, c);
-       bch2_bkey_buf_exit(&prev, c);
        bch2_btree_and_journal_iter_exit(&iter);
        printbuf_exit(&buf);
        return ret;
@@ -1033,9 +1022,6 @@ static int bch2_gc_btree_init(struct btree_trans *trans,
 
        b = bch2_btree_id_root(c, btree_id)->b;
 
-       if (btree_node_fake(b))
-               return 0;
-
        six_lock_read(&b->c.lock, NULL, NULL);
        printbuf_reset(&buf);
        bch2_bpos_to_text(&buf, b->data->min_key);
index 34df8ccc5fecc2bfbad874e77f53fa1f5f068251..d7de82ac389354f9a0d5eef0a66c8694f1752b94 100644 (file)
@@ -654,6 +654,7 @@ void bch2_btree_node_drop_keys_outside_node(struct btree *b)
         */
        bch2_bset_set_no_aux_tree(b, b->set);
        bch2_btree_build_aux_trees(b);
+       b->nr = bch2_btree_node_count_keys(b);
 
        struct bkey_s_c k;
        struct bkey unpacked;
@@ -1263,10 +1264,12 @@ out:
        return retry_read;
 fsck_err:
        if (ret == -BCH_ERR_btree_node_read_err_want_retry ||
-           ret == -BCH_ERR_btree_node_read_err_must_retry)
+           ret == -BCH_ERR_btree_node_read_err_must_retry) {
                retry_read = 1;
-       else
+       } else {
                set_btree_node_read_error(b);
+               bch2_btree_lost_data(c, b->c.btree_id);
+       }
        goto out;
 }
 
@@ -1327,6 +1330,7 @@ start:
 
                if (!can_retry) {
                        set_btree_node_read_error(b);
+                       bch2_btree_lost_data(c, b->c.btree_id);
                        break;
                }
        }
@@ -1526,9 +1530,10 @@ fsck_err:
                ret = -1;
        }
 
-       if (ret)
+       if (ret) {
                set_btree_node_read_error(b);
-       else if (*saw_error)
+               bch2_btree_lost_data(c, b->c.btree_id);
+       } else if (*saw_error)
                bch2_btree_node_rewrite_async(c, b);
 
        for (i = 0; i < ra->nr; i++) {
@@ -1657,13 +1662,14 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
 
                prt_str(&buf, "btree node read error: no device to read from\n at ");
                bch2_btree_pos_to_text(&buf, c, b);
-               bch_err(c, "%s", buf.buf);
+               bch_err_ratelimited(c, "%s", buf.buf);
 
                if (c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology) &&
                    c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology)
                        bch2_fatal_error(c);
 
                set_btree_node_read_error(b);
+               bch2_btree_lost_data(c, b->c.btree_id);
                clear_btree_node_read_in_flight(b);
                wake_up_bit(&b->flags, BTREE_NODE_read_in_flight);
                printbuf_exit(&buf);
@@ -1860,7 +1866,7 @@ static void btree_node_write_work(struct work_struct *work)
        } else {
                ret = bch2_trans_do(c, NULL, NULL, 0,
                        bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
-                                       BCH_WATERMARK_reclaim|
+                                       BCH_WATERMARK_interior_updates|
                                        BCH_TRANS_COMMIT_journal_reclaim|
                                        BCH_TRANS_COMMIT_no_enospc|
                                        BCH_TRANS_COMMIT_no_check_rw,
index 51bcdc6c6d1cda83be21b43b54d0d11f320a0471..2a211a4bebd153daa57ac049c3a01fbffcb424f1 100644 (file)
@@ -927,8 +927,22 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
                if (ret)
                        goto err;
        } else {
-               bch2_bkey_buf_unpack(&tmp, c, l->b,
-                                bch2_btree_node_iter_peek(&l->iter, l->b));
+               struct bkey_packed *k = bch2_btree_node_iter_peek(&l->iter, l->b);
+               if (!k) {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "node not found at pos ");
+                       bch2_bpos_to_text(&buf, path->pos);
+                       prt_str(&buf, " within parent node ");
+                       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&l->b->key));
+
+                       bch2_fs_fatal_error(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       ret = -BCH_ERR_btree_need_topology_repair;
+                       goto err;
+               }
+
+               bch2_bkey_buf_unpack(&tmp, c, l->b, k);
 
                if ((flags & BTREE_ITER_PREFETCH) &&
                    c->opts.btree_node_prefetch) {
@@ -962,7 +976,6 @@ err:
        return ret;
 }
 
-
 static int bch2_btree_path_traverse_all(struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
@@ -2790,6 +2803,31 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
        struct btree_transaction_stats *s = btree_trans_stats(trans);
        s->max_mem = max(s->max_mem, new_bytes);
 
+       if (trans->used_mempool) {
+               if (trans->mem_bytes >= new_bytes)
+                       goto out_change_top;
+
+               /* No more space from mempool item, need malloc new one */
+               new_mem = kmalloc(new_bytes, GFP_NOWAIT|__GFP_NOWARN);
+               if (unlikely(!new_mem)) {
+                       bch2_trans_unlock(trans);
+
+                       new_mem = kmalloc(new_bytes, GFP_KERNEL);
+                       if (!new_mem)
+                               return ERR_PTR(-BCH_ERR_ENOMEM_trans_kmalloc);
+
+                       ret = bch2_trans_relock(trans);
+                       if (ret) {
+                               kfree(new_mem);
+                               return ERR_PTR(ret);
+                       }
+               }
+               memcpy(new_mem, trans->mem, trans->mem_top);
+               trans->used_mempool = false;
+               mempool_free(trans->mem, &c->btree_trans_mem_pool);
+               goto out_new_mem;
+       }
+
        new_mem = krealloc(trans->mem, new_bytes, GFP_NOWAIT|__GFP_NOWARN);
        if (unlikely(!new_mem)) {
                bch2_trans_unlock(trans);
@@ -2798,6 +2836,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
                if (!new_mem && new_bytes <= BTREE_TRANS_MEM_MAX) {
                        new_mem = mempool_alloc(&c->btree_trans_mem_pool, GFP_KERNEL);
                        new_bytes = BTREE_TRANS_MEM_MAX;
+                       memcpy(new_mem, trans->mem, trans->mem_top);
+                       trans->used_mempool = true;
                        kfree(trans->mem);
                }
 
@@ -2811,7 +2851,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
                if (ret)
                        return ERR_PTR(ret);
        }
-
+out_new_mem:
        trans->mem = new_mem;
        trans->mem_bytes = new_bytes;
 
@@ -2819,7 +2859,7 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
                trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
                return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
        }
-
+out_change_top:
        p = trans->mem + trans->mem_top;
        trans->mem_top += size;
        memset(p, 0, size);
@@ -3093,7 +3133,7 @@ void bch2_trans_put(struct btree_trans *trans)
        if (paths_allocated != trans->_paths_allocated)
                kvfree_rcu_mightsleep(paths_allocated);
 
-       if (trans->mem_bytes == BTREE_TRANS_MEM_MAX)
+       if (trans->used_mempool)
                mempool_free(trans->mem, &c->btree_trans_mem_pool);
        else
                kfree(trans->mem);
index 50e04356d72c8ea0f920545561b4797875afc03f..5cbcbfe85235b8de3777ae82b120d4627f99c8d7 100644 (file)
@@ -261,6 +261,22 @@ int bch2_journal_key_delete(struct bch_fs *c, enum btree_id id,
        return bch2_journal_key_insert(c, id, level, &whiteout);
 }
 
+bool bch2_key_deleted_in_journal(struct btree_trans *trans, enum btree_id btree,
+                                unsigned level, struct bpos pos)
+{
+       struct journal_keys *keys = &trans->c->journal_keys;
+       size_t idx = bch2_journal_key_search(keys, btree, level, pos);
+
+       if (!trans->journal_replay_not_finished)
+               return false;
+
+       return (idx < keys->size &&
+               keys->data[idx].btree_id        == btree &&
+               keys->data[idx].level           == level &&
+               bpos_eq(keys->data[idx].k->k.p, pos) &&
+               bkey_deleted(&keys->data[idx].k->k));
+}
+
 void bch2_journal_key_overwritten(struct bch_fs *c, enum btree_id btree,
                                  unsigned level, struct bpos pos)
 {
@@ -363,7 +379,7 @@ static void btree_and_journal_iter_prefetch(struct btree_and_journal_iter *_iter
 
 struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *iter)
 {
-       struct bkey_s_c btree_k, journal_k, ret;
+       struct bkey_s_c btree_k, journal_k = bkey_s_c_null, ret;
 
        if (iter->prefetch && iter->journal.level)
                btree_and_journal_iter_prefetch(iter);
@@ -375,9 +391,10 @@ again:
               bpos_lt(btree_k.k->p, iter->pos))
                bch2_journal_iter_advance_btree(iter);
 
-       while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
-              bpos_lt(journal_k.k->p, iter->pos))
-               bch2_journal_iter_advance(&iter->journal);
+       if (iter->trans->journal_replay_not_finished)
+               while ((journal_k = bch2_journal_iter_peek(&iter->journal)).k &&
+                      bpos_lt(journal_k.k->p, iter->pos))
+                       bch2_journal_iter_advance(&iter->journal);
 
        ret = journal_k.k &&
                (!btree_k.k || bpos_le(journal_k.k->p, btree_k.k->p))
@@ -435,7 +452,9 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
 
        bch2_btree_node_iter_init_from_start(&node_iter, b);
        __bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
-       list_add(&iter->journal.list, &trans->c->journal_iters);
+       if (trans->journal_replay_not_finished &&
+           !test_bit(BCH_FS_may_go_rw, &trans->c->flags))
+               list_add(&iter->journal.list, &trans->c->journal_iters);
 }
 
 /* sort and dedup all keys in the journal: */
@@ -548,3 +567,22 @@ int bch2_journal_keys_sort(struct bch_fs *c)
        bch_verbose(c, "Journal keys: %zu read, %zu after sorting and compacting", nr_read, keys->nr);
        return 0;
 }
+
+void bch2_shoot_down_journal_keys(struct bch_fs *c, enum btree_id btree,
+                                 unsigned level_min, unsigned level_max,
+                                 struct bpos start, struct bpos end)
+{
+       struct journal_keys *keys = &c->journal_keys;
+       size_t dst = 0;
+
+       move_gap(keys, keys->nr);
+
+       darray_for_each(*keys, i)
+               if (!(i->btree_id == btree &&
+                     i->level >= level_min &&
+                     i->level <= level_max &&
+                     bpos_ge(i->k->k.p, start) &&
+                     bpos_le(i->k->k.p, end)))
+                       keys->data[dst++] = *i;
+       keys->nr = keys->gap = dst;
+}
index c9d19da3ea04803a360a683fa0e01a2838f2433f..af25046ebcaa763bd7e7b8c37e34818ea06c5cc2 100644 (file)
@@ -40,8 +40,8 @@ int bch2_journal_key_insert(struct bch_fs *, enum btree_id,
                            unsigned, struct bkey_i *);
 int bch2_journal_key_delete(struct bch_fs *, enum btree_id,
                            unsigned, struct bpos);
-void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id,
-                                 unsigned, struct bpos);
+bool bch2_key_deleted_in_journal(struct btree_trans *, enum btree_id, unsigned, struct bpos);
+void bch2_journal_key_overwritten(struct bch_fs *, enum btree_id, unsigned, struct bpos);
 
 void bch2_btree_and_journal_iter_advance(struct btree_and_journal_iter *);
 struct bkey_s_c bch2_btree_and_journal_iter_peek(struct btree_and_journal_iter *);
@@ -66,4 +66,8 @@ void bch2_journal_entries_free(struct bch_fs *);
 
 int bch2_journal_keys_sort(struct bch_fs *);
 
+void bch2_shoot_down_journal_keys(struct bch_fs *, enum btree_id,
+                                 unsigned, unsigned,
+                                 struct bpos, struct bpos);
+
 #endif /* _BCACHEFS_BTREE_JOURNAL_ITER_H */
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
new file mode 100644 (file)
index 0000000..3f33be7
--- /dev/null
@@ -0,0 +1,495 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "btree_cache.h"
+#include "btree_io.h"
+#include "btree_journal_iter.h"
+#include "btree_node_scan.h"
+#include "btree_update_interior.h"
+#include "buckets.h"
+#include "error.h"
+#include "journal_io.h"
+#include "recovery_passes.h"
+
+#include <linux/kthread.h>
+#include <linux/sort.h>
+
+struct find_btree_nodes_worker {
+       struct closure          *cl;
+       struct find_btree_nodes *f;
+       struct bch_dev          *ca;
+};
+
+static void found_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct found_btree_node *n)
+{
+       prt_printf(out, "%s l=%u seq=%u cookie=%llx ", bch2_btree_id_str(n->btree_id), n->level, n->seq, n->cookie);
+       bch2_bpos_to_text(out, n->min_key);
+       prt_str(out, "-");
+       bch2_bpos_to_text(out, n->max_key);
+
+       if (n->range_updated)
+               prt_str(out, " range updated");
+       if (n->overwritten)
+               prt_str(out, " overwritten");
+
+       for (unsigned i = 0; i < n->nr_ptrs; i++) {
+               prt_char(out, ' ');
+               bch2_extent_ptr_to_text(out, c, n->ptrs + i);
+       }
+}
+
+static void found_btree_nodes_to_text(struct printbuf *out, struct bch_fs *c, found_btree_nodes nodes)
+{
+       printbuf_indent_add(out, 2);
+       darray_for_each(nodes, i) {
+               found_btree_node_to_text(out, c, i);
+               prt_newline(out);
+       }
+       printbuf_indent_sub(out, 2);
+}
+
+static void found_btree_node_to_key(struct bkey_i *k, const struct found_btree_node *f)
+{
+       struct bkey_i_btree_ptr_v2 *bp = bkey_btree_ptr_v2_init(k);
+
+       set_bkey_val_u64s(&bp->k, sizeof(struct bch_btree_ptr_v2) / sizeof(u64) + f->nr_ptrs);
+       bp->k.p                 = f->max_key;
+       bp->v.seq               = cpu_to_le64(f->cookie);
+       bp->v.sectors_written   = 0;
+       bp->v.flags             = 0;
+       bp->v.min_key           = f->min_key;
+       SET_BTREE_PTR_RANGE_UPDATED(&bp->v, f->range_updated);
+       memcpy(bp->v.start, f->ptrs, sizeof(struct bch_extent_ptr) * f->nr_ptrs);
+}
+
+static bool found_btree_node_is_readable(struct btree_trans *trans,
+                                        const struct found_btree_node *f)
+{
+       struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } k;
+
+       found_btree_node_to_key(&k.k, f);
+
+       struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
+       bool ret = !IS_ERR_OR_NULL(b);
+       if (ret)
+               six_unlock_read(&b->c.lock);
+
+       /*
+        * We might update this node's range; if that happens, we need the node
+        * to be re-read so the read path can trim keys that are no longer in
+        * this node
+        */
+       if (b != btree_node_root(trans->c, b))
+               bch2_btree_node_evict(trans, &k.k);
+       return ret;
+}
+
+static int found_btree_node_cmp_cookie(const void *_l, const void *_r)
+{
+       const struct found_btree_node *l = _l;
+       const struct found_btree_node *r = _r;
+
+       return  cmp_int(l->btree_id,    r->btree_id) ?:
+               cmp_int(l->level,       r->level) ?:
+               cmp_int(l->cookie,      r->cookie);
+}
+
+/*
+ * Given two found btree nodes, if their sequence numbers are equal, take the
+ * one that's readable:
+ */
+static int found_btree_node_cmp_time(const struct found_btree_node *l,
+                                    const struct found_btree_node *r)
+{
+       return cmp_int(l->seq, r->seq);
+}
+
+static int found_btree_node_cmp_pos(const void *_l, const void *_r)
+{
+       const struct found_btree_node *l = _l;
+       const struct found_btree_node *r = _r;
+
+       return  cmp_int(l->btree_id,    r->btree_id) ?:
+              -cmp_int(l->level,       r->level) ?:
+               bpos_cmp(l->min_key,    r->min_key) ?:
+              -found_btree_node_cmp_time(l, r);
+}
+
+static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
+                               struct bio *bio, struct btree_node *bn, u64 offset)
+{
+       struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+
+       bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ);
+       bio->bi_iter.bi_sector  = offset;
+       bch2_bio_map(bio, bn, PAGE_SIZE);
+
+       submit_bio_wait(bio);
+       if (bch2_dev_io_err_on(bio->bi_status, ca, BCH_MEMBER_ERROR_read,
+                              "IO error in try_read_btree_node() at %llu: %s",
+                              offset, bch2_blk_status_to_str(bio->bi_status)))
+               return;
+
+       if (le64_to_cpu(bn->magic) != bset_magic(c))
+               return;
+
+       rcu_read_lock();
+       struct found_btree_node n = {
+               .btree_id       = BTREE_NODE_ID(bn),
+               .level          = BTREE_NODE_LEVEL(bn),
+               .seq            = BTREE_NODE_SEQ(bn),
+               .cookie         = le64_to_cpu(bn->keys.seq),
+               .min_key        = bn->min_key,
+               .max_key        = bn->max_key,
+               .nr_ptrs        = 1,
+               .ptrs[0].type   = 1 << BCH_EXTENT_ENTRY_ptr,
+               .ptrs[0].offset = offset,
+               .ptrs[0].dev    = ca->dev_idx,
+               .ptrs[0].gen    = *bucket_gen(ca, sector_to_bucket(ca, offset)),
+       };
+       rcu_read_unlock();
+
+       if (bch2_trans_run(c, found_btree_node_is_readable(trans, &n))) {
+               mutex_lock(&f->lock);
+               if (BSET_BIG_ENDIAN(&bn->keys) != CPU_BIG_ENDIAN) {
+                       bch_err(c, "try_read_btree_node() can't handle endian conversion");
+                       f->ret = -EINVAL;
+                       goto unlock;
+               }
+
+               if (darray_push(&f->nodes, n))
+                       f->ret = -ENOMEM;
+unlock:
+               mutex_unlock(&f->lock);
+       }
+}
+
+static int read_btree_nodes_worker(void *p)
+{
+       struct find_btree_nodes_worker *w = p;
+       struct bch_fs *c = container_of(w->f, struct bch_fs, found_btree_nodes);
+       struct bch_dev *ca = w->ca;
+       void *buf = (void *) __get_free_page(GFP_KERNEL);
+       struct bio *bio = bio_alloc(NULL, 1, 0, GFP_KERNEL);
+       unsigned long last_print = jiffies;
+
+       if (!buf || !bio) {
+               bch_err(c, "read_btree_nodes_worker: error allocating bio/buf");
+               w->f->ret = -ENOMEM;
+               goto err;
+       }
+
+       for (u64 bucket = ca->mi.first_bucket; bucket < ca->mi.nbuckets; bucket++)
+               for (unsigned bucket_offset = 0;
+                    bucket_offset + btree_sectors(c) <= ca->mi.bucket_size;
+                    bucket_offset += btree_sectors(c)) {
+                       if (time_after(jiffies, last_print + HZ * 30)) {
+                               u64 cur_sector = bucket * ca->mi.bucket_size + bucket_offset;
+                               u64 end_sector = ca->mi.nbuckets * ca->mi.bucket_size;
+
+                               bch_info(ca, "%s: %2u%% done", __func__,
+                                        (unsigned) div64_u64(cur_sector * 100, end_sector));
+                               last_print = jiffies;
+                       }
+
+                       try_read_btree_node(w->f, ca, bio, buf,
+                                           bucket * ca->mi.bucket_size + bucket_offset);
+               }
+err:
+       bio_put(bio);
+       free_page((unsigned long) buf);
+       percpu_ref_get(&ca->io_ref);
+       closure_put(w->cl);
+       kfree(w);
+       return 0;
+}
+
+static int read_btree_nodes(struct find_btree_nodes *f)
+{
+       struct bch_fs *c = container_of(f, struct bch_fs, found_btree_nodes);
+       struct closure cl;
+       int ret = 0;
+
+       closure_init_stack(&cl);
+
+       for_each_online_member(c, ca) {
+               struct find_btree_nodes_worker *w = kmalloc(sizeof(*w), GFP_KERNEL);
+               struct task_struct *t;
+
+               if (!w) {
+                       percpu_ref_put(&ca->io_ref);
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               percpu_ref_get(&ca->io_ref);
+               closure_get(&cl);
+               w->cl           = &cl;
+               w->f            = f;
+               w->ca           = ca;
+
+               t = kthread_run(read_btree_nodes_worker, w, "read_btree_nodes/%s", ca->name);
+               ret = IS_ERR_OR_NULL(t);
+               if (ret) {
+                       percpu_ref_put(&ca->io_ref);
+                       closure_put(&cl);
+                       f->ret = ret;
+                       bch_err(c, "error starting kthread: %i", ret);
+                       break;
+               }
+       }
+err:
+       closure_sync(&cl);
+       return f->ret ?: ret;
+}
+
+static void bubble_up(struct found_btree_node *n, struct found_btree_node *end)
+{
+       while (n + 1 < end &&
+              found_btree_node_cmp_pos(n, n + 1) > 0) {
+               swap(n[0], n[1]);
+               n++;
+       }
+}
+
+static int handle_overwrites(struct bch_fs *c,
+                            struct found_btree_node *start,
+                            struct found_btree_node *end)
+{
+       struct found_btree_node *n;
+again:
+       for (n = start + 1;
+            n < end &&
+            n->btree_id        == start->btree_id &&
+            n->level           == start->level &&
+            bpos_lt(n->min_key, start->max_key);
+            n++)  {
+               int cmp = found_btree_node_cmp_time(start, n);
+
+               if (cmp > 0) {
+                       if (bpos_cmp(start->max_key, n->max_key) >= 0)
+                               n->overwritten = true;
+                       else {
+                               n->range_updated = true;
+                               n->min_key = bpos_successor(start->max_key);
+                               n->range_updated = true;
+                               bubble_up(n, end);
+                               goto again;
+                       }
+               } else if (cmp < 0) {
+                       BUG_ON(bpos_cmp(n->min_key, start->min_key) <= 0);
+
+                       start->max_key = bpos_predecessor(n->min_key);
+                       start->range_updated = true;
+               } else {
+                       struct printbuf buf = PRINTBUF;
+
+                       prt_str(&buf, "overlapping btree nodes with same seq! halting\n  ");
+                       found_btree_node_to_text(&buf, c, start);
+                       prt_str(&buf, "\n  ");
+                       found_btree_node_to_text(&buf, c, n);
+                       bch_err(c, "%s", buf.buf);
+                       printbuf_exit(&buf);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+int bch2_scan_for_btree_nodes(struct bch_fs *c)
+{
+       struct find_btree_nodes *f = &c->found_btree_nodes;
+       struct printbuf buf = PRINTBUF;
+       size_t dst;
+       int ret = 0;
+
+       if (f->nodes.nr)
+               return 0;
+
+       mutex_init(&f->lock);
+
+       ret = read_btree_nodes(f);
+       if (ret)
+               return ret;
+
+       if (!f->nodes.nr) {
+               bch_err(c, "%s: no btree nodes found", __func__);
+               ret = -EINVAL;
+               goto err;
+       }
+
+       if (0 && c->opts.verbose) {
+               printbuf_reset(&buf);
+               prt_printf(&buf, "%s: nodes found:\n", __func__);
+               found_btree_nodes_to_text(&buf, c, f->nodes);
+               bch2_print_string_as_lines(KERN_INFO, buf.buf);
+       }
+
+       sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_cookie, NULL);
+
+       dst = 0;
+       darray_for_each(f->nodes, i) {
+               struct found_btree_node *prev = dst ? f->nodes.data + dst - 1 : NULL;
+
+               if (prev &&
+                   prev->cookie == i->cookie) {
+                       if (prev->nr_ptrs == ARRAY_SIZE(prev->ptrs)) {
+                               bch_err(c, "%s: found too many replicas for btree node", __func__);
+                               ret = -EINVAL;
+                               goto err;
+                       }
+                       prev->ptrs[prev->nr_ptrs++] = i->ptrs[0];
+               } else {
+                       f->nodes.data[dst++] = *i;
+               }
+       }
+       f->nodes.nr = dst;
+
+       sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+
+       if (0 && c->opts.verbose) {
+               printbuf_reset(&buf);
+               prt_printf(&buf, "%s: nodes after merging replicas:\n", __func__);
+               found_btree_nodes_to_text(&buf, c, f->nodes);
+               bch2_print_string_as_lines(KERN_INFO, buf.buf);
+       }
+
+       dst = 0;
+       darray_for_each(f->nodes, i) {
+               if (i->overwritten)
+                       continue;
+
+               ret = handle_overwrites(c, i, &darray_top(f->nodes));
+               if (ret)
+                       goto err;
+
+               BUG_ON(i->overwritten);
+               f->nodes.data[dst++] = *i;
+       }
+       f->nodes.nr = dst;
+
+       if (c->opts.verbose) {
+               printbuf_reset(&buf);
+               prt_printf(&buf, "%s: nodes found after overwrites:\n", __func__);
+               found_btree_nodes_to_text(&buf, c, f->nodes);
+               bch2_print_string_as_lines(KERN_INFO, buf.buf);
+       }
+
+       eytzinger0_sort(f->nodes.data, f->nodes.nr, sizeof(f->nodes.data[0]), found_btree_node_cmp_pos, NULL);
+err:
+       printbuf_exit(&buf);
+       return ret;
+}
+
+static int found_btree_node_range_start_cmp(const void *_l, const void *_r)
+{
+       const struct found_btree_node *l = _l;
+       const struct found_btree_node *r = _r;
+
+       return  cmp_int(l->btree_id,    r->btree_id) ?:
+              -cmp_int(l->level,       r->level) ?:
+               bpos_cmp(l->max_key,    r->min_key);
+}
+
+#define for_each_found_btree_node_in_range(_f, _search, _idx)                          \
+       for (size_t _idx = eytzinger0_find_gt((_f)->nodes.data, (_f)->nodes.nr,         \
+                                       sizeof((_f)->nodes.data[0]),                    \
+                                       found_btree_node_range_start_cmp, &search);     \
+            _idx < (_f)->nodes.nr &&                                                   \
+            (_f)->nodes.data[_idx].btree_id == _search.btree_id &&                     \
+            (_f)->nodes.data[_idx].level == _search.level &&                           \
+            bpos_lt((_f)->nodes.data[_idx].min_key, _search.max_key);                  \
+            _idx = eytzinger0_next(_idx, (_f)->nodes.nr))
+
+bool bch2_btree_node_is_stale(struct bch_fs *c, struct btree *b)
+{
+       struct find_btree_nodes *f = &c->found_btree_nodes;
+
+       struct found_btree_node search = {
+               .btree_id       = b->c.btree_id,
+               .level          = b->c.level,
+               .min_key        = b->data->min_key,
+               .max_key        = b->key.k.p,
+       };
+
+       for_each_found_btree_node_in_range(f, search, idx)
+               if (f->nodes.data[idx].seq > BTREE_NODE_SEQ(b->data))
+                       return true;
+       return false;
+}
+
+bool bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
+{
+       struct found_btree_node search = {
+               .btree_id       = btree,
+               .level          = 0,
+               .min_key        = POS_MIN,
+               .max_key        = SPOS_MAX,
+       };
+
+       for_each_found_btree_node_in_range(&c->found_btree_nodes, search, idx)
+               return true;
+       return false;
+}
+
+int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
+                          unsigned level, struct bpos node_min, struct bpos node_max)
+{
+       struct find_btree_nodes *f = &c->found_btree_nodes;
+
+       int ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
+       if (ret)
+               return ret;
+
+       if (c->opts.verbose) {
+               struct printbuf buf = PRINTBUF;
+
+               prt_printf(&buf, "recovering %s l=%u ", bch2_btree_id_str(btree), level);
+               bch2_bpos_to_text(&buf, node_min);
+               prt_str(&buf, " - ");
+               bch2_bpos_to_text(&buf, node_max);
+
+               bch_info(c, "%s(): %s", __func__, buf.buf);
+               printbuf_exit(&buf);
+       }
+
+       struct found_btree_node search = {
+               .btree_id       = btree,
+               .level          = level,
+               .min_key        = node_min,
+               .max_key        = node_max,
+       };
+
+       for_each_found_btree_node_in_range(f, search, idx) {
+               struct found_btree_node n = f->nodes.data[idx];
+
+               n.range_updated |= bpos_lt(n.min_key, node_min);
+               n.min_key = bpos_max(n.min_key, node_min);
+
+               n.range_updated |= bpos_gt(n.max_key, node_max);
+               n.max_key = bpos_min(n.max_key, node_max);
+
+               struct { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX); } tmp;
+
+               found_btree_node_to_key(&tmp.k, &n);
+
+               struct printbuf buf = PRINTBUF;
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&tmp.k));
+               bch_verbose(c, "%s(): recovering %s", __func__, buf.buf);
+               printbuf_exit(&buf);
+
+               BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL));
+
+               ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *f)
+{
+       darray_exit(&f->nodes);
+}
diff --git a/fs/bcachefs/btree_node_scan.h b/fs/bcachefs/btree_node_scan.h
new file mode 100644 (file)
index 0000000..08687b2
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_H
+#define _BCACHEFS_BTREE_NODE_SCAN_H
+
+int bch2_scan_for_btree_nodes(struct bch_fs *);
+bool bch2_btree_node_is_stale(struct bch_fs *, struct btree *);
+bool bch2_btree_has_scanned_nodes(struct bch_fs *, enum btree_id);
+int bch2_get_scanned_nodes(struct bch_fs *, enum btree_id, unsigned, struct bpos, struct bpos);
+void bch2_find_btree_nodes_exit(struct find_btree_nodes *);
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_H */
diff --git a/fs/bcachefs/btree_node_scan_types.h b/fs/bcachefs/btree_node_scan_types.h
new file mode 100644 (file)
index 0000000..abb7b27
--- /dev/null
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+#define _BCACHEFS_BTREE_NODE_SCAN_TYPES_H
+
+#include "darray.h"
+
+struct found_btree_node {
+       bool                    range_updated:1;
+       bool                    overwritten:1;
+       u8                      btree_id;
+       u8                      level;
+       u32                     seq;
+       u64                     cookie;
+
+       struct bpos             min_key;
+       struct bpos             max_key;
+
+       unsigned                nr_ptrs;
+       struct bch_extent_ptr   ptrs[BCH_REPLICAS_MAX];
+};
+
+typedef DARRAY(struct found_btree_node)        found_btree_nodes;
+
+struct find_btree_nodes {
+       int                     ret;
+       struct mutex            lock;
+       found_btree_nodes       nodes;
+};
+
+#endif /* _BCACHEFS_BTREE_NODE_SCAN_TYPES_H */
index 30d69a6d133eec77c76c7e64a5de0d896ad6b732..aa9da49707404015a558c9c6e9339b733d0c98c3 100644 (file)
@@ -318,7 +318,7 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
                !(i->flags & BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) &&
                test_bit(JOURNAL_REPLAY_DONE, &trans->c->journal.flags) &&
                i->k->k.p.snapshot &&
-               bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot));
+               bch2_snapshot_is_internal_node(trans->c, i->k->k.p.snapshot) > 0);
 }
 
 static __always_inline int bch2_trans_journal_res_get(struct btree_trans *trans,
@@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
                            int ret, unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
+       enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
 
        switch (ret) {
        case -BCH_ERR_btree_insert_btree_node_full:
@@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
                 * flag
                 */
                if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
+                   watermark < BCH_WATERMARK_reclaim) {
                        ret = -BCH_ERR_journal_reclaim_would_deadlock;
                        break;
                }
index a4b40c1656a54b0a13c9f562d337827387a44b15..8e47e260eba59b09097fd07b1d9063a0e665a812 100644 (file)
@@ -38,6 +38,9 @@ static noinline int extent_front_merge(struct btree_trans *trans,
        struct bkey_i *update;
        int ret;
 
+       if (unlikely(trans->journal_replay_not_finished))
+               return 0;
+
        update = bch2_bkey_make_mut_noupdate(trans, k);
        ret = PTR_ERR_OR_ZERO(update);
        if (ret)
@@ -69,6 +72,9 @@ static noinline int extent_back_merge(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        int ret;
 
+       if (unlikely(trans->journal_replay_not_finished))
+               return 0;
+
        ret =   bch2_key_has_snapshot_overwrites(trans, iter->btree_id, insert->k.p) ?:
                bch2_key_has_snapshot_overwrites(trans, iter->btree_id, k.k->p);
        if (ret < 0)
index b2f5f2e50f7e19ccd59502a1471ee9bb6d14a988..32397b99752fd2ec3cfd553724c97c7f217ca56e 100644 (file)
@@ -2,6 +2,7 @@
 
 #include "bcachefs.h"
 #include "alloc_foreground.h"
+#include "bkey_buf.h"
 #include "bkey_methods.h"
 #include "btree_cache.h"
 #include "btree_gc.h"
 #include "journal.h"
 #include "journal_reclaim.h"
 #include "keylist.h"
+#include "recovery_passes.h"
 #include "replicas.h"
 #include "super-io.h"
 #include "trace.h"
 
 #include <linux/random.h>
 
+const char * const bch2_btree_update_modes[] = {
+#define x(t) #t,
+       BCH_WATERMARKS()
+#undef x
+       NULL
+};
+
 static int bch2_btree_insert_node(struct btree_update *, struct btree_trans *,
                                  btree_path_idx_t, struct btree *, struct keylist *);
 static void bch2_btree_update_add_new_node(struct btree_update *, struct btree *);
@@ -44,56 +53,103 @@ static btree_path_idx_t get_unlocked_mut_path(struct btree_trans *trans,
        return path_idx;
 }
 
-/* Debug code: */
-
 /*
  * Verify that child nodes correctly span parent node's range:
  */
-static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
+int bch2_btree_node_check_topology(struct btree_trans *trans, struct btree *b)
 {
-#ifdef CONFIG_BCACHEFS_DEBUG
-       struct bpos next_node = b->data->min_key;
-       struct btree_node_iter iter;
+       struct bch_fs *c = trans->c;
+       struct bpos node_min = b->key.k.type == KEY_TYPE_btree_ptr_v2
+               ? bkey_i_to_btree_ptr_v2(&b->key)->v.min_key
+               : b->data->min_key;
+       struct btree_and_journal_iter iter;
        struct bkey_s_c k;
-       struct bkey_s_c_btree_ptr_v2 bp;
-       struct bkey unpacked;
-       struct printbuf buf1 = PRINTBUF, buf2 = PRINTBUF;
+       struct printbuf buf = PRINTBUF;
+       struct bkey_buf prev;
+       int ret = 0;
 
-       BUG_ON(!b->c.level);
+       BUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+              !bpos_eq(bkey_i_to_btree_ptr_v2(&b->key)->v.min_key,
+                       b->data->min_key));
 
-       if (!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
-               return;
+       if (!b->c.level)
+               return 0;
 
-       bch2_btree_node_iter_init_from_start(&iter, b);
+       bch2_bkey_buf_init(&prev);
+       bkey_init(&prev.k->k);
+       bch2_btree_and_journal_iter_init_node_iter(trans, &iter, b);
 
-       while (1) {
-               k = bch2_btree_node_iter_peek_unpack(&iter, b, &unpacked);
+       while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
                if (k.k->type != KEY_TYPE_btree_ptr_v2)
-                       break;
-               bp = bkey_s_c_to_btree_ptr_v2(k);
+                       goto out;
 
-               if (!bpos_eq(next_node, bp.v->min_key)) {
-                       bch2_dump_btree_node(c, b);
-                       bch2_bpos_to_text(&buf1, next_node);
-                       bch2_bpos_to_text(&buf2, bp.v->min_key);
-                       panic("expected next min_key %s got %s\n", buf1.buf, buf2.buf);
-               }
+               struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
 
-               bch2_btree_node_iter_advance(&iter, b);
+               struct bpos expected_min = bkey_deleted(&prev.k->k)
+                       ? node_min
+                       : bpos_successor(prev.k->k.p);
 
-               if (bch2_btree_node_iter_end(&iter)) {
-                       if (!bpos_eq(k.k->p, b->key.k.p)) {
-                               bch2_dump_btree_node(c, b);
-                               bch2_bpos_to_text(&buf1, b->key.k.p);
-                               bch2_bpos_to_text(&buf2, k.k->p);
-                               panic("expected end %s got %s\n", buf1.buf, buf2.buf);
-                       }
-                       break;
+               if (!bpos_eq(expected_min, bp.v->min_key)) {
+                       bch2_topology_error(c);
+
+                       printbuf_reset(&buf);
+                       prt_str(&buf, "end of prev node doesn't match start of next node\n"),
+                       prt_printf(&buf, "  in btree %s level %u node ",
+                                  bch2_btree_id_str(b->c.btree_id), b->c.level);
+                       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+                       prt_str(&buf, "\n  prev ");
+                       bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+                       prt_str(&buf, "\n  next ");
+                       bch2_bkey_val_to_text(&buf, c, k);
+
+                       need_fsck_err(c, btree_node_topology_bad_min_key, "%s", buf.buf);
+                       goto topology_repair;
                }
 
-               next_node = bpos_successor(k.k->p);
+               bch2_bkey_buf_reassemble(&prev, c, k);
+               bch2_btree_and_journal_iter_advance(&iter);
+       }
+
+       if (bkey_deleted(&prev.k->k)) {
+               bch2_topology_error(c);
+
+               printbuf_reset(&buf);
+               prt_str(&buf, "empty interior node\n");
+               prt_printf(&buf, "  in btree %s level %u node ",
+                          bch2_btree_id_str(b->c.btree_id), b->c.level);
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+
+               need_fsck_err(c, btree_node_topology_empty_interior_node, "%s", buf.buf);
+               goto topology_repair;
+       } else if (!bpos_eq(prev.k->k.p, b->key.k.p)) {
+               bch2_topology_error(c);
+
+               printbuf_reset(&buf);
+               prt_str(&buf, "last child node doesn't end at end of parent node\n");
+               prt_printf(&buf, "  in btree %s level %u node ",
+                          bch2_btree_id_str(b->c.btree_id), b->c.level);
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
+               prt_str(&buf, "\n  last key ");
+               bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(prev.k));
+
+               need_fsck_err(c, btree_node_topology_bad_max_key, "%s", buf.buf);
+               goto topology_repair;
        }
-#endif
+out:
+fsck_err:
+       bch2_btree_and_journal_iter_exit(&iter);
+       bch2_bkey_buf_exit(&prev, c);
+       printbuf_exit(&buf);
+       return ret;
+topology_repair:
+       if ((c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_check_topology)) &&
+           c->curr_recovery_pass > BCH_RECOVERY_PASS_check_topology) {
+               bch2_inconsistent_error(c);
+               ret = -BCH_ERR_btree_need_topology_repair;
+       } else {
+               ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_check_topology);
+       }
+       goto out;
 }
 
 /* Calculate ideal packed bkey format for new btree nodes: */
@@ -254,7 +310,7 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
        struct open_buckets obs = { .nr = 0 };
        struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
        enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
-       unsigned nr_reserve = watermark > BCH_WATERMARK_reclaim
+       unsigned nr_reserve = watermark < BCH_WATERMARK_reclaim
                ? BTREE_NODE_RESERVE
                : 0;
        int ret;
@@ -638,7 +694,7 @@ static void btree_update_nodes_written(struct btree_update *as)
         * which may require allocations as well.
         */
        ret = commit_do(trans, &as->disk_res, &journal_seq,
-                       BCH_WATERMARK_reclaim|
+                       BCH_WATERMARK_interior_updates|
                        BCH_TRANS_COMMIT_no_enospc|
                        BCH_TRANS_COMMIT_no_check_rw|
                        BCH_TRANS_COMMIT_journal_reclaim,
@@ -797,11 +853,11 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
        mutex_lock(&c->btree_interior_update_lock);
        list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
 
-       BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+       BUG_ON(as->mode != BTREE_UPDATE_none);
        BUG_ON(!btree_node_dirty(b));
        BUG_ON(!b->c.level);
 
-       as->mode        = BTREE_INTERIOR_UPDATING_NODE;
+       as->mode        = BTREE_UPDATE_node;
        as->b           = b;
 
        set_btree_node_write_blocked(b);
@@ -824,7 +880,7 @@ static void btree_update_reparent(struct btree_update *as,
        lockdep_assert_held(&c->btree_interior_update_lock);
 
        child->b = NULL;
-       child->mode = BTREE_INTERIOR_UPDATING_AS;
+       child->mode = BTREE_UPDATE_update;
 
        bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal,
                              bch2_update_reparent_journal_pin_flush);
@@ -835,7 +891,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
        struct bkey_i *insert = &b->key;
        struct bch_fs *c = as->c;
 
-       BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+       BUG_ON(as->mode != BTREE_UPDATE_none);
 
        BUG_ON(as->journal_u64s + jset_u64s(insert->k.u64s) >
               ARRAY_SIZE(as->journal_entries));
@@ -849,7 +905,7 @@ static void btree_update_updated_root(struct btree_update *as, struct btree *b)
        mutex_lock(&c->btree_interior_update_lock);
        list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
 
-       as->mode        = BTREE_INTERIOR_UPDATING_ROOT;
+       as->mode        = BTREE_UPDATE_root;
        mutex_unlock(&c->btree_interior_update_lock);
 }
 
@@ -1027,7 +1083,7 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
        struct bch_fs *c = as->c;
        u64 start_time = as->start_time;
 
-       BUG_ON(as->mode == BTREE_INTERIOR_NO_UPDATE);
+       BUG_ON(as->mode == BTREE_UPDATE_none);
 
        if (as->took_gc_lock)
                up_read(&as->c->gc_lock);
@@ -1072,7 +1128,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
 
                if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark != BCH_WATERMARK_reclaim)
+                   watermark < BCH_WATERMARK_reclaim)
                        journal_flags |= JOURNAL_RES_GET_NONBLOCK;
 
                ret = drop_locks_do(trans,
@@ -1123,7 +1179,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
        as->c           = c;
        as->start_time  = start_time;
        as->ip_started  = _RET_IP_;
-       as->mode        = BTREE_INTERIOR_NO_UPDATE;
+       as->mode        = BTREE_UPDATE_none;
+       as->watermark   = watermark;
        as->took_gc_lock = true;
        as->btree_id    = path->btree_id;
        as->update_level = update_level;
@@ -1168,7 +1225,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                 */
                if (bch2_err_matches(ret, ENOSPC) &&
                    (flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark != BCH_WATERMARK_reclaim) {
+                   watermark < BCH_WATERMARK_reclaim) {
                        ret = -BCH_ERR_journal_reclaim_would_deadlock;
                        goto err;
                }
@@ -1380,9 +1437,16 @@ static void __btree_split_node(struct btree_update *as,
                if (bkey_deleted(k))
                        continue;
 
+               uk = bkey_unpack_key(b, k);
+
+               if (b->c.level &&
+                   u64s < n1_u64s &&
+                   u64s + k->u64s >= n1_u64s &&
+                   bch2_key_deleted_in_journal(trans, b->c.btree_id, b->c.level, uk.p))
+                       n1_u64s += k->u64s;
+
                i = u64s >= n1_u64s;
                u64s += k->u64s;
-               uk = bkey_unpack_key(b, k);
                if (!i)
                        n1_pos = uk.p;
                bch2_bkey_format_add_key(&format[i], &uk);
@@ -1441,8 +1505,7 @@ static void __btree_split_node(struct btree_update *as,
 
                bch2_verify_btree_nr_keys(n[i]);
 
-               if (b->c.level)
-                       btree_node_interior_verify(as->c, n[i]);
+               BUG_ON(bch2_btree_node_check_topology(trans, n[i]));
        }
 }
 
@@ -1473,7 +1536,7 @@ static void btree_split_insert_keys(struct btree_update *as,
 
                __bch2_btree_insert_keys_interior(as, trans, path, b, node_iter, keys);
 
-               btree_node_interior_verify(as->c, b);
+               BUG_ON(bch2_btree_node_check_topology(trans, b));
        }
 }
 
@@ -1488,9 +1551,14 @@ static int btree_split(struct btree_update *as, struct btree_trans *trans,
        u64 start_time = local_clock();
        int ret = 0;
 
+       bch2_verify_btree_nr_keys(b);
        BUG_ON(!parent && (b != btree_node_root(c, b)));
        BUG_ON(parent && !btree_node_intent_locked(trans->paths + path, b->c.level + 1));
 
+       ret = bch2_btree_node_check_topology(trans, b);
+       if (ret)
+               return ret;
+
        bch2_btree_interior_update_will_free_node(as, b);
 
        if (b->nr.live_u64s > BTREE_SPLIT_THRESHOLD(c)) {
@@ -1710,7 +1778,11 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
                goto split;
        }
 
-       btree_node_interior_verify(c, b);
+       ret = bch2_btree_node_check_topology(trans, b);
+       if (ret) {
+               bch2_btree_node_unlock_write(trans, path, b);
+               return ret;
+       }
 
        bch2_btree_insert_keys_interior(as, trans, path, b, keys);
 
@@ -1728,7 +1800,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
 
        bch2_btree_node_unlock_write(trans, path, b);
 
-       btree_node_interior_verify(c, b);
+       BUG_ON(bch2_btree_node_check_topology(trans, b));
        return 0;
 split:
        /*
@@ -1818,9 +1890,12 @@ int bch2_btree_increase_depth(struct btree_trans *trans, btree_path_idx_t path,
 {
        struct bch_fs *c = trans->c;
        struct btree *b = bch2_btree_id_root(c, trans->paths[path].btree_id)->b;
+
+       if (btree_node_fake(b))
+               return bch2_btree_split_leaf(trans, path, flags);
+
        struct btree_update *as =
-               bch2_btree_update_start(trans, trans->paths + path,
-                                       b->c.level, true, flags);
+               bch2_btree_update_start(trans, trans->paths + path, b->c.level, true, flags);
        if (IS_ERR(as))
                return PTR_ERR(as);
 
@@ -2391,7 +2466,7 @@ void bch2_btree_set_root_for_read(struct bch_fs *c, struct btree *b)
        bch2_btree_set_root_inmem(c, b);
 }
 
-static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
+static int __bch2_btree_root_alloc_fake(struct btree_trans *trans, enum btree_id id, unsigned level)
 {
        struct bch_fs *c = trans->c;
        struct closure cl;
@@ -2410,7 +2485,7 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
 
        set_btree_node_fake(b);
        set_btree_node_need_rewrite(b);
-       b->c.level      = 0;
+       b->c.level      = level;
        b->c.btree_id   = id;
 
        bkey_btree_ptr_init(&b->key);
@@ -2437,9 +2512,21 @@ static int __bch2_btree_root_alloc(struct btree_trans *trans, enum btree_id id)
        return 0;
 }
 
-void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id)
+void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level)
+{
+       bch2_trans_run(c, __bch2_btree_root_alloc_fake(trans, id, level));
+}
+
+static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as)
 {
-       bch2_trans_run(c, __bch2_btree_root_alloc(trans, id));
+       prt_printf(out, "%ps: btree=%s watermark=%s mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
+                  (void *) as->ip_started,
+                  bch2_btree_id_str(as->btree_id),
+                  bch2_watermarks[as->watermark],
+                  bch2_btree_update_modes[as->mode],
+                  as->nodes_written,
+                  closure_nr_remaining(&as->cl),
+                  as->journal.seq);
 }
 
 void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
@@ -2448,12 +2535,7 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)
 
        mutex_lock(&c->btree_interior_update_lock);
        list_for_each_entry(as, &c->btree_interior_update_list, list)
-               prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
-                          (void *) as->ip_started,
-                          as->mode,
-                          as->nodes_written,
-                          closure_nr_remaining(&as->cl),
-                          as->journal.seq);
+               bch2_btree_update_to_text(out, as);
        mutex_unlock(&c->btree_interior_update_lock);
 }
 
index f651dd48aaa0496b7cf7c80eb183896b1d418044..88dcf5a22a3bd628aaa22065f3cdd70ca3770d90 100644 (file)
 
 #define BTREE_UPDATE_JOURNAL_RES       (BTREE_UPDATE_NODES_MAX * (BKEY_BTREE_PTR_U64s_MAX + 1))
 
+int bch2_btree_node_check_topology(struct btree_trans *, struct btree *);
+
+#define BTREE_UPDATE_MODES()   \
+       x(none)                 \
+       x(node)                 \
+       x(root)                 \
+       x(update)
+
+enum btree_update_mode {
+#define x(n)   BTREE_UPDATE_##n,
+       BTREE_UPDATE_MODES()
+#undef x
+};
+
 /*
  * Tracks an in progress split/rewrite of a btree node and the update to the
  * parent node:
@@ -37,14 +51,8 @@ struct btree_update {
        struct list_head                list;
        struct list_head                unwritten_list;
 
-       /* What kind of update are we doing? */
-       enum {
-               BTREE_INTERIOR_NO_UPDATE,
-               BTREE_INTERIOR_UPDATING_NODE,
-               BTREE_INTERIOR_UPDATING_ROOT,
-               BTREE_INTERIOR_UPDATING_AS,
-       } mode;
-
+       enum btree_update_mode          mode;
+       enum bch_watermark              watermark;
        unsigned                        nodes_written:1;
        unsigned                        took_gc_lock:1;
 
@@ -54,7 +62,7 @@ struct btree_update {
        struct disk_reservation         disk_res;
 
        /*
-        * BTREE_INTERIOR_UPDATING_NODE:
+        * BTREE_UPDATE_node:
         * The update that made the new nodes visible was a regular update to an
         * existing interior node - @b. We can't write out the update to @b
         * until the new nodes we created are finished writing, so we block @b
@@ -163,7 +171,7 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *, struct btree *,
                                        struct bkey_i *, unsigned, bool);
 
 void bch2_btree_set_root_for_read(struct bch_fs *, struct btree *);
-void bch2_btree_root_alloc(struct bch_fs *, enum btree_id);
+void bch2_btree_root_alloc_fake(struct bch_fs *, enum btree_id, unsigned);
 
 static inline unsigned btree_update_reserve_required(struct bch_fs *c,
                                                     struct btree *b)
index 5cbad8445782c4006074365c13551b1c38b57849..baf63e2fddb64cd8f4c745d0cc80c864c86ffaa6 100644 (file)
@@ -11,6 +11,7 @@
 #include "journal_reclaim.h"
 
 #include <linux/prefetch.h>
+#include <linux/sort.h>
 
 static int bch2_btree_write_buffer_journal_flush(struct journal *,
                                struct journal_entry_pin *, u64);
@@ -46,6 +47,14 @@ static inline bool wb_key_ref_cmp(const struct wb_key_ref *l, const struct wb_ke
 #endif
 }
 
+static int wb_key_seq_cmp(const void *_l, const void *_r)
+{
+       const struct btree_write_buffered_key *l = _l;
+       const struct btree_write_buffered_key *r = _r;
+
+       return cmp_int(l->journal_seq, r->journal_seq);
+}
+
 /* Compare excluding idx, the low 24 bits: */
 static inline bool wb_key_eq(const void *_l, const void *_r)
 {
@@ -357,6 +366,11 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
                 */
                trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, wb->flushing.keys.nr);
 
+               sort(wb->flushing.keys.data,
+                    wb->flushing.keys.nr,
+                    sizeof(wb->flushing.keys.data[0]),
+                    wb_key_seq_cmp, NULL);
+
                darray_for_each(wb->flushing.keys, i) {
                        if (!i->journal_seq)
                                continue;
index 96edf2c34d433d1c1ad41ec8da0c77b8d40afe9f..941401a210f56993359548e51b5095d0db45e691 100644 (file)
@@ -525,6 +525,7 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                        "different types of data in same bucket: %s, %s",
                        bch2_data_type_str(g->data_type),
                        bch2_data_type_str(data_type))) {
+               BUG();
                ret = -EIO;
                goto err;
        }
@@ -628,6 +629,7 @@ int bch2_check_bucket_ref(struct btree_trans *trans,
                        bch2_data_type_str(ptr_data_type),
                        (printbuf_reset(&buf),
                         bch2_bkey_val_to_text(&buf, c, k), buf.buf));
+               BUG();
                ret = -EIO;
                goto err;
        }
@@ -815,14 +817,14 @@ static int __mark_pointer(struct btree_trans *trans,
 static int bch2_trigger_pointer(struct btree_trans *trans,
                        enum btree_id btree_id, unsigned level,
                        struct bkey_s_c k, struct extent_ptr_decoded p,
-                       s64 *sectors,
-                       unsigned flags)
+                       const union bch_extent_entry *entry,
+                       s64 *sectors, unsigned flags)
 {
        bool insert = !(flags & BTREE_TRIGGER_OVERWRITE);
        struct bpos bucket;
        struct bch_backpointer bp;
 
-       bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, &bucket, &bp);
+       bch2_extent_ptr_to_bp(trans->c, btree_id, level, k, p, entry, &bucket, &bp);
        *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len);
 
        if (flags & BTREE_TRIGGER_TRANSACTIONAL) {
@@ -851,7 +853,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
        if (flags & BTREE_TRIGGER_GC) {
                struct bch_fs *c = trans->c;
                struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-               enum bch_data_type data_type = bkey_ptr_data_type(btree_id, level, k, p);
+               enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry);
 
                percpu_down_read(&c->mark_lock);
                struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr);
@@ -979,7 +981,7 @@ static int __trigger_extent(struct btree_trans *trans,
 
        bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
                s64 disk_sectors;
-               ret = bch2_trigger_pointer(trans, btree_id, level, k, p, &disk_sectors, flags);
+               ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags);
                if (ret < 0)
                        return ret;
 
index 6387e039f7897534e27c207dd3818dc4b6afb3b7..00aaf4bb513974a6b9c0353ea9445f92671c32eb 100644 (file)
@@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
                fallthrough;
        case BCH_WATERMARK_btree_copygc:
        case BCH_WATERMARK_reclaim:
+       case BCH_WATERMARK_interior_updates:
                break;
        }
 
index 38defa19d52d701762fa95a02cb1e22e7a0c182c..cbfa6459bdbceec6a953f91a385fc5e4fe76691d 100644 (file)
@@ -7,7 +7,7 @@
 #include "chardev.h"
 #include "journal.h"
 #include "move.h"
-#include "recovery.h"
+#include "recovery_passes.h"
 #include "replicas.h"
 #include "super.h"
 #include "super-io.h"
index 4150feca42a2e65e63a59234a3e806ebbd09e1ac..34731ee0217f62f6e43fb691e76083c46026b127 100644 (file)
@@ -14,6 +14,7 @@
 #include "move.h"
 #include "nocow_locking.h"
 #include "rebalance.h"
+#include "snapshot.h"
 #include "subvolume.h"
 #include "trace.h"
 
@@ -509,6 +510,14 @@ int bch2_data_update_init(struct btree_trans *trans,
        unsigned ptrs_locked = 0;
        int ret = 0;
 
+       /*
+        * fs is corrupt  we have a key for a snapshot node that doesn't exist,
+        * and we have to check for this because we go rw before repairing the
+        * snapshots table - just skip it, we can move it later.
+        */
+       if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot)))
+               return -BCH_ERR_data_update_done;
+
        bch2_bkey_buf_init(&m->k);
        bch2_bkey_buf_reassemble(&m->k, c, k);
        m->btree_id     = btree_id;
@@ -571,8 +580,7 @@ int bch2_data_update_init(struct btree_trans *trans,
                                move_ctxt_wait_event(ctxt,
                                                (locked = bch2_bucket_nocow_trylock(&c->nocow_locks,
                                                                          PTR_BUCKET_POS(c, &p.ptr), 0)) ||
-                                               (!atomic_read(&ctxt->read_sectors) &&
-                                                !atomic_read(&ctxt->write_sectors)));
+                                               list_empty(&ctxt->ios));
 
                                if (!locked)
                                        bch2_bucket_nocow_lock(&c->nocow_locks,
index af25d8ec60f221d9d935a0ef4ad7aef3641a9e3d..01a79fa3eacb211cb7cd779616f512d427102fd4 100644 (file)
        x(BCH_ERR_nopromote,            nopromote_in_flight)                    \
        x(BCH_ERR_nopromote,            nopromote_no_writes)                    \
        x(BCH_ERR_nopromote,            nopromote_enomem)                       \
-       x(0,                            need_inode_lock)
+       x(0,                            need_inode_lock)                        \
+       x(0,                            invalid_snapshot_node)
 
 enum bch_errcode {
        BCH_ERR_START           = 2048,
index 043431206799d80a6e3eab43bd635947fa48db9f..82a6656c941c5f16e6725d0bfbf66a3b63157e0f 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "bcachefs.h"
 #include "error.h"
-#include "recovery.h"
+#include "journal.h"
+#include "recovery_passes.h"
 #include "super.h"
 #include "thread_with_file.h"
 
@@ -16,7 +17,8 @@ bool bch2_inconsistent_error(struct bch_fs *c)
                return false;
        case BCH_ON_ERROR_ro:
                if (bch2_fs_emergency_read_only(c))
-                       bch_err(c, "inconsistency detected - emergency read only");
+                       bch_err(c, "inconsistency detected - emergency read only at journal seq %llu",
+                               journal_cur_seq(&c->journal));
                return true;
        case BCH_ON_ERROR_panic:
                panic(bch2_fmt(c, "panic after error"));
index ae1d6674c512d44521379f21d5872b2b79993f57..36caedf72d89abbb74fd7e11757aa388b6c0a84a 100644 (file)
@@ -32,6 +32,12 @@ bool bch2_inconsistent_error(struct bch_fs *);
 
 int bch2_topology_error(struct bch_fs *);
 
+#define bch2_fs_topology_error(c, ...)                                 \
+({                                                                     \
+       bch_err(c, "btree topology error: " __VA_ARGS__);               \
+       bch2_topology_error(c);                                         \
+})
+
 #define bch2_fs_inconsistent(c, ...)                                   \
 ({                                                                     \
        bch_err(c, __VA_ARGS__);                                        \
index 61395b113df9bdad67c0da7d2a4cc4f99664bc4e..0e3ca99fbd2de1522c5e8dea8ac313232f60f7f3 100644 (file)
@@ -189,13 +189,18 @@ int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k,
                              enum bkey_invalid_flags flags,
                              struct printbuf *err)
 {
+       struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k);
        int ret = 0;
 
-       bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, c, err,
-                        btree_ptr_v2_val_too_big,
+       bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX,
+                        c, err, btree_ptr_v2_val_too_big,
                         "value too big (%zu > %zu)",
                         bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX);
 
+       bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p),
+                        c, err, btree_ptr_v2_min_key_bad,
+                        "min_key > key");
+
        ret = bch2_bkey_ptrs_invalid(c, k, flags, err);
 fsck_err:
        return ret;
@@ -973,6 +978,31 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k)
        return bkey_deleted(k.k);
 }
 
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr)
+{
+       struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
+               ? bch_dev_bkey_exists(c, ptr->dev)
+               : NULL;
+
+       if (!ca) {
+               prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
+                          (u64) ptr->offset, ptr->gen,
+                          ptr->cached ? " cached" : "");
+       } else {
+               u32 offset;
+               u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
+
+               prt_printf(out, "ptr: %u:%llu:%u gen %u",
+                          ptr->dev, b, offset, ptr->gen);
+               if (ptr->cached)
+                       prt_str(out, " cached");
+               if (ptr->unwritten)
+                       prt_str(out, " unwritten");
+               if (ca && ptr_stale(ca, ptr))
+                       prt_printf(out, " stale");
+       }
+}
+
 void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
                            struct bkey_s_c k)
 {
@@ -988,31 +1018,10 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
                        prt_printf(out, " ");
 
                switch (__extent_entry_type(entry)) {
-               case BCH_EXTENT_ENTRY_ptr: {
-                       const struct bch_extent_ptr *ptr = entry_to_ptr(entry);
-                       struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev]
-                               ? bch_dev_bkey_exists(c, ptr->dev)
-                               : NULL;
-
-                       if (!ca) {
-                               prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev,
-                                      (u64) ptr->offset, ptr->gen,
-                                      ptr->cached ? " cached" : "");
-                       } else {
-                               u32 offset;
-                               u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset);
-
-                               prt_printf(out, "ptr: %u:%llu:%u gen %u",
-                                          ptr->dev, b, offset, ptr->gen);
-                               if (ptr->cached)
-                                       prt_str(out, " cached");
-                               if (ptr->unwritten)
-                                       prt_str(out, " unwritten");
-                               if (ca && ptr_stale(ca, ptr))
-                                       prt_printf(out, " stale");
-                       }
+               case BCH_EXTENT_ENTRY_ptr:
+                       bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry));
                        break;
-               }
+
                case BCH_EXTENT_ENTRY_crc32:
                case BCH_EXTENT_ENTRY_crc64:
                case BCH_EXTENT_ENTRY_crc128: {
index fd2669cdd76f3b23861a9c0835253d3812a6de10..528e817eacbdad3a058eaf20c8a7526fd9dea3d9 100644 (file)
@@ -596,30 +596,6 @@ static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k)
        return ret;
 }
 
-static inline unsigned bch2_bkey_ptr_data_type(struct bkey_s_c k, const struct bch_extent_ptr *ptr)
-{
-       switch (k.k->type) {
-       case KEY_TYPE_btree_ptr:
-       case KEY_TYPE_btree_ptr_v2:
-               return BCH_DATA_btree;
-       case KEY_TYPE_extent:
-       case KEY_TYPE_reflink_v:
-               return BCH_DATA_user;
-       case KEY_TYPE_stripe: {
-               struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
-
-               BUG_ON(ptr < s.v->ptrs ||
-                      ptr >= s.v->ptrs + s.v->nr_blocks);
-
-               return ptr >= s.v->ptrs + s.v->nr_blocks - s.v->nr_redundant
-                       ? BCH_DATA_parity
-                       : BCH_DATA_user;
-       }
-       default:
-               BUG();
-       }
-}
-
 unsigned bch2_bkey_nr_ptrs(struct bkey_s_c);
 unsigned bch2_bkey_nr_ptrs_allocated(struct bkey_s_c);
 unsigned bch2_bkey_nr_ptrs_fully_allocated(struct bkey_s_c);
@@ -700,6 +676,7 @@ bch2_extent_has_ptr(struct bkey_s_c, struct extent_ptr_decoded, struct bkey_s);
 void bch2_extent_ptr_set_cached(struct bkey_s, struct bch_extent_ptr *);
 
 bool bch2_extent_normalize(struct bch_fs *, struct bkey_s);
+void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *);
 void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *,
                            struct bkey_s_c);
 int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c,
diff --git a/fs/bcachefs/eytzinger.c b/fs/bcachefs/eytzinger.c
new file mode 100644 (file)
index 0000000..4ce5e95
--- /dev/null
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "eytzinger.h"
+
+/**
+ * is_aligned - is this pointer & size okay for word-wide copying?
+ * @base: pointer to data
+ * @size: size of each element
+ * @align: required alignment (typically 4 or 8)
+ *
+ * Returns true if elements can be copied using word loads and stores.
+ * The size must be a multiple of the alignment, and the base address must
+ * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
+ *
+ * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
+ * to "if ((a | b) & mask)", so we do that by hand.
+ */
+__attribute_const__ __always_inline
+static bool is_aligned(const void *base, size_t size, unsigned char align)
+{
+       unsigned char lsbits = (unsigned char)size;
+
+       (void)base;
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+       lsbits |= (unsigned char)(uintptr_t)base;
+#endif
+       return (lsbits & (align - 1)) == 0;
+}
+
+/**
+ * swap_words_32 - swap two elements in 32-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 4)
+ *
+ * Exchange the two objects in memory.  This exploits base+index addressing,
+ * which basically all CPUs have, to minimize loop overhead computations.
+ *
+ * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
+ * bottom of the loop, even though the zero flag is still valid from the
+ * subtract (since the intervening mov instructions don't alter the flags).
+ * Gcc 8.1.0 doesn't have that problem.
+ */
+static void swap_words_32(void *a, void *b, size_t n)
+{
+       do {
+               u32 t = *(u32 *)(a + (n -= 4));
+               *(u32 *)(a + n) = *(u32 *)(b + n);
+               *(u32 *)(b + n) = t;
+       } while (n);
+}
+
+/**
+ * swap_words_64 - swap two elements in 64-bit chunks
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size (must be a multiple of 8)
+ *
+ * Exchange the two objects in memory.  This exploits base+index
+ * addressing, which basically all CPUs have, to minimize loop overhead
+ * computations.
+ *
+ * We'd like to use 64-bit loads if possible.  If they're not, emulating
+ * one requires base+index+4 addressing which x86 has but most other
+ * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
+ * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
+ * x32 ABI).  Are there any cases the kernel needs to worry about?
+ */
+static void swap_words_64(void *a, void *b, size_t n)
+{
+       do {
+#ifdef CONFIG_64BIT
+               u64 t = *(u64 *)(a + (n -= 8));
+               *(u64 *)(a + n) = *(u64 *)(b + n);
+               *(u64 *)(b + n) = t;
+#else
+               /* Use two 32-bit transfers to avoid base+index+4 addressing */
+               u32 t = *(u32 *)(a + (n -= 4));
+               *(u32 *)(a + n) = *(u32 *)(b + n);
+               *(u32 *)(b + n) = t;
+
+               t = *(u32 *)(a + (n -= 4));
+               *(u32 *)(a + n) = *(u32 *)(b + n);
+               *(u32 *)(b + n) = t;
+#endif
+       } while (n);
+}
+
+/**
+ * swap_bytes - swap two elements a byte at a time
+ * @a: pointer to the first element to swap
+ * @b: pointer to the second element to swap
+ * @n: element size
+ *
+ * This is the fallback if alignment doesn't allow using larger chunks.
+ */
+static void swap_bytes(void *a, void *b, size_t n)
+{
+       do {
+               char t = ((char *)a)[--n];
+               ((char *)a)[n] = ((char *)b)[n];
+               ((char *)b)[n] = t;
+       } while (n);
+}
+
+/*
+ * The values are arbitrary as long as they can't be confused with
+ * a pointer, but small integers make for the smallest compare
+ * instructions.
+ */
+#define SWAP_WORDS_64 (swap_r_func_t)0
+#define SWAP_WORDS_32 (swap_r_func_t)1
+#define SWAP_BYTES    (swap_r_func_t)2
+#define SWAP_WRAPPER  (swap_r_func_t)3
+
+struct wrapper {
+       cmp_func_t cmp;
+       swap_func_t swap;
+};
+
+/*
+ * The function pointer is last to make tail calls most efficient if the
+ * compiler decides not to inline this function.
+ */
+static void do_swap(void *a, void *b, size_t size, swap_r_func_t swap_func, const void *priv)
+{
+       if (swap_func == SWAP_WRAPPER) {
+               ((const struct wrapper *)priv)->swap(a, b, (int)size);
+               return;
+       }
+
+       if (swap_func == SWAP_WORDS_64)
+               swap_words_64(a, b, size);
+       else if (swap_func == SWAP_WORDS_32)
+               swap_words_32(a, b, size);
+       else if (swap_func == SWAP_BYTES)
+               swap_bytes(a, b, size);
+       else
+               swap_func(a, b, (int)size, priv);
+}
+
+#define _CMP_WRAPPER ((cmp_r_func_t)0L)
+
+static int do_cmp(const void *a, const void *b, cmp_r_func_t cmp, const void *priv)
+{
+       if (cmp == _CMP_WRAPPER)
+               return ((const struct wrapper *)priv)->cmp(a, b);
+       return cmp(a, b, priv);
+}
+
+static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
+                        cmp_r_func_t cmp_func, const void *priv,
+                        size_t l, size_t r)
+{
+       return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
+                     base + inorder_to_eytzinger0(r, n) * size,
+                     cmp_func, priv);
+}
+
+static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
+                          swap_r_func_t swap_func, const void *priv,
+                          size_t l, size_t r)
+{
+       do_swap(base + inorder_to_eytzinger0(l, n) * size,
+               base + inorder_to_eytzinger0(r, n) * size,
+               size, swap_func, priv);
+}
+
+void eytzinger0_sort_r(void *base, size_t n, size_t size,
+                      cmp_r_func_t cmp_func,
+                      swap_r_func_t swap_func,
+                      const void *priv)
+{
+       int i, c, r;
+
+       /* called from 'sort' without swap function, let's pick the default */
+       if (swap_func == SWAP_WRAPPER && !((struct wrapper *)priv)->swap)
+               swap_func = NULL;
+
+       if (!swap_func) {
+               if (is_aligned(base, size, 8))
+                       swap_func = SWAP_WORDS_64;
+               else if (is_aligned(base, size, 4))
+                       swap_func = SWAP_WORDS_32;
+               else
+                       swap_func = SWAP_BYTES;
+       }
+
+       /* heapify */
+       for (i = n / 2 - 1; i >= 0; --i) {
+               for (r = i; r * 2 + 1 < n; r = c) {
+                       c = r * 2 + 1;
+
+                       if (c + 1 < n &&
+                           eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+                               c++;
+
+                       if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+                               break;
+
+                       eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+               }
+       }
+
+       /* sort */
+       for (i = n - 1; i > 0; --i) {
+               eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
+
+               for (r = 0; r * 2 + 1 < i; r = c) {
+                       c = r * 2 + 1;
+
+                       if (c + 1 < i &&
+                           eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
+                               c++;
+
+                       if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
+                               break;
+
+                       eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
+               }
+       }
+}
+
+void eytzinger0_sort(void *base, size_t n, size_t size,
+                    cmp_func_t cmp_func,
+                    swap_func_t swap_func)
+{
+       struct wrapper w = {
+               .cmp  = cmp_func,
+               .swap = swap_func,
+       };
+
+       return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
+}
index b04750dbf870bc78c95ece35d363e3a4c0936b50..ee0e2df33322d2dccb60e1ed90257863769ead0d 100644 (file)
@@ -5,23 +5,33 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 
-#include "util.h"
+#ifdef EYTZINGER_DEBUG
+#define EYTZINGER_BUG_ON(cond)         BUG_ON(cond)
+#else
+#define EYTZINGER_BUG_ON(cond)
+#endif
 
 /*
  * Traversal for trees in eytzinger layout - a full binary tree layed out in an
- * array
- */
-
-/*
- * One based indexing version:
+ * array.
  *
- * With one based indexing each level of the tree starts at a power of two -
- * good for cacheline alignment:
+ * Consider using an eytzinger tree any time you would otherwise be doing binary
+ * search over an array. Binary search is a worst case scenario for branch
+ * prediction and prefetching, but in an eytzinger tree every node's children
+ * are adjacent in memory, thus we can prefetch children before knowing the
+ * result of the comparison, assuming multiple nodes fit on a cacheline.
+ *
+ * Two variants are provided, for one based indexing and zero based indexing.
+ *
+ * Zero based indexing is more convenient, but one based indexing has better
+ * alignment and thus better performance because each new level of the tree
+ * starts at a power of two, and thus if element 0 was cacheline aligned, each
+ * new level will be as well.
  */
 
 static inline unsigned eytzinger1_child(unsigned i, unsigned child)
 {
-       EBUG_ON(child > 1);
+       EYTZINGER_BUG_ON(child > 1);
 
        return (i << 1) + child;
 }
@@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
 
 static inline unsigned eytzinger1_next(unsigned i, unsigned size)
 {
-       EBUG_ON(i > size);
+       EYTZINGER_BUG_ON(i > size);
 
        if (eytzinger1_right_child(i) <= size) {
                i = eytzinger1_right_child(i);
@@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
 
 static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
 {
-       EBUG_ON(i > size);
+       EYTZINGER_BUG_ON(i > size);
 
        if (eytzinger1_left_child(i) <= size) {
                i = eytzinger1_left_child(i) + 1;
@@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
        unsigned shift = __fls(size) - b;
        int s;
 
-       EBUG_ON(!i || i > size);
+       EYTZINGER_BUG_ON(!i || i > size);
 
        i  ^= 1U << b;
        i <<= 1;
@@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
        unsigned shift;
        int s;
 
-       EBUG_ON(!i || i > size);
+       EYTZINGER_BUG_ON(!i || i > size);
 
        /*
         * sign bit trick:
@@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
 
 static inline unsigned eytzinger0_child(unsigned i, unsigned child)
 {
-       EBUG_ON(child > 1);
+       EYTZINGER_BUG_ON(child > 1);
 
        return (i << 1) + 1 + child;
 }
@@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
             (_i) != -1;                                \
             (_i) = eytzinger0_next((_i), (_size)))
 
-typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
-
 /* return greatest node <= @search, or -1 if not found */
 static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
-                                        eytzinger_cmp_fn cmp, const void *search)
+                                        cmp_func_t cmp, const void *search)
 {
        unsigned i, n = 0;
 
@@ -244,21 +252,24 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
 
        do {
                i = n;
-               n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
+               n = eytzinger0_child(i, cmp(base + i * size, search) <= 0);
        } while (n < nr);
 
        if (n & 1) {
                /* @i was greater than @search, return previous node: */
-
-               if (i == eytzinger0_first(nr))
-                       return -1;
-
                return eytzinger0_prev(i, nr);
        } else {
                return i;
        }
 }
 
+static inline ssize_t eytzinger0_find_gt(void *base, size_t nr, size_t size,
+                                        cmp_func_t cmp, const void *search)
+{
+       ssize_t idx = eytzinger0_find_le(base, nr, size, cmp, search);
+       return eytzinger0_next(idx, size);
+}
+
 #define eytzinger0_find(base, nr, size, _cmp, search)                  \
 ({                                                                     \
        void *_base             = (base);                               \
@@ -269,13 +280,13 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
        int _res;                                                       \
                                                                        \
        while (_i < _nr &&                                              \
-              (_res = _cmp(_search, _base + _i * _size, _size)))       \
+              (_res = _cmp(_search, _base + _i * _size)))              \
                _i = eytzinger0_child(_i, _res > 0);                    \
        _i;                                                             \
 })
 
-void eytzinger0_sort(void *, size_t, size_t,
-                   int (*cmp_func)(const void *, const void *, size_t),
-                   void (*swap_func)(void *, void *, size_t));
+void eytzinger0_sort_r(void *, size_t, size_t,
+                      cmp_r_func_t, swap_r_func_t, const void *);
+void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
 
 #endif /* _EYTZINGER_H */
index 33cb6da3a5ad28f2c014c2ef12408937933d49c3..f49e6c0f0f6835968202ab2f1fa194933945554a 100644 (file)
@@ -536,7 +536,7 @@ static __always_inline long bch2_dio_write_loop(struct dio_write *dio)
                if (likely(!dio->iter.count) || dio->op.error)
                        break;
 
-               bio_reset(bio, NULL, REQ_OP_WRITE);
+               bio_reset(bio, NULL, REQ_OP_WRITE | REQ_SYNC | REQ_IDLE);
        }
 out:
        return bch2_dio_write_done(dio);
@@ -618,7 +618,7 @@ ssize_t bch2_direct_write(struct kiocb *req, struct iov_iter *iter)
 
        bio = bio_alloc_bioset(NULL,
                               bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS),
-                              REQ_OP_WRITE,
+                              REQ_OP_WRITE | REQ_SYNC | REQ_IDLE,
                               GFP_KERNEL,
                               &c->dio_write_bioset);
        dio = container_of(bio, struct dio_write, op.wbio.bio);
index 0ccee05f6887b3b0aedf1e7c11f82665c7d38ba5..b5ea9fa1259d1462e9033318466fc914911672ce 100644 (file)
@@ -1997,6 +1997,7 @@ out:
        return dget(sb->s_root);
 
 err_put_super:
+       __bch2_fs_stop(c);
        deactivate_locked_super(sb);
        return ERR_PTR(bch2_err_class(ret));
 }
index 47d4eefaba7ba05dc1a610ddc35a27bb10891b5d..8e2010212cc371ddb53e651fb355ca376ee60f4b 100644 (file)
@@ -12,7 +12,7 @@
 #include "fsck.h"
 #include "inode.h"
 #include "keylist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
 #include "snapshot.h"
 #include "super.h"
 #include "xattr.h"
@@ -63,9 +63,7 @@ static int subvol_lookup(struct btree_trans *trans, u32 subvol,
                         u32 *snapshot, u64 *inum)
 {
        struct bch_subvolume s;
-       int ret;
-
-       ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
+       int ret = bch2_subvolume_get(trans, subvol, false, 0, &s);
 
        *snapshot = le32_to_cpu(s.snapshot);
        *inum = le64_to_cpu(s.inode);
@@ -158,9 +156,10 @@ static int __remove_dirent(struct btree_trans *trans, struct bpos pos)
 
        bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT);
 
-       ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
-                                 &dir_hash_info, &iter,
-                                 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+       ret =   bch2_btree_iter_traverse(&iter) ?:
+               bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
+                                   &dir_hash_info, &iter,
+                                   BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
        bch2_trans_iter_exit(trans, &iter);
 err:
        bch_err_fn(c, ret);
@@ -169,7 +168,8 @@ err:
 
 /* Get lost+found, create if it doesn't exist: */
 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
-                           struct bch_inode_unpacked *lostfound)
+                           struct bch_inode_unpacked *lostfound,
+                           u64 reattaching_inum)
 {
        struct bch_fs *c = trans->c;
        struct qstr lostfound_str = QSTR("lost+found");
@@ -184,19 +184,36 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
                return ret;
 
        subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) };
-       u32 subvol_snapshot;
 
-       ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol),
-                           &subvol_snapshot, &root_inum.inum);
-       bch_err_msg(c, ret, "looking up root subvol");
+       struct bch_subvolume subvol;
+       ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol),
+                                false, 0, &subvol);
+       bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u",
+                   le32_to_cpu(st.master_subvol), snapshot);
        if (ret)
                return ret;
 
+       if (!subvol.inode) {
+               struct btree_iter iter;
+               struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter,
+                               BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)),
+                               0, subvolume);
+               ret = PTR_ERR_OR_ZERO(subvol);
+               if (ret)
+                       return ret;
+
+               subvol->v.inode = cpu_to_le64(reattaching_inum);
+               bch2_trans_iter_exit(trans, &iter);
+       }
+
+       root_inum.inum = le64_to_cpu(subvol.inode);
+
        struct bch_inode_unpacked root_inode;
        struct bch_hash_info root_hash_info;
        u32 root_inode_snapshot = snapshot;
        ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot);
-       bch_err_msg(c, ret, "looking up root inode");
+       bch_err_msg(c, ret, "looking up root inode %llu for subvol %u",
+                   root_inum.inum, le32_to_cpu(st.master_subvol));
        if (ret)
                return ret;
 
@@ -292,7 +309,7 @@ static int reattach_inode(struct btree_trans *trans,
                snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum);
        }
 
-       ret = lookup_lostfound(trans, dirent_snapshot, &lostfound);
+       ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum);
        if (ret)
                return ret;
 
@@ -363,6 +380,112 @@ static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume
        return ret;
 }
 
+static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum)
+{
+       struct bch_fs *c = trans->c;
+
+       if (!bch2_snapshot_is_leaf(c, snapshotid)) {
+               bch_err(c, "need to reconstruct subvol, but have interior node snapshot");
+               return -BCH_ERR_fsck_repair_unimplemented;
+       }
+
+       /*
+        * If inum isn't set, that means we're being called from check_dirents,
+        * not check_inodes - the root of this subvolume doesn't exist or we
+        * would have found it there:
+        */
+       if (!inum) {
+               struct btree_iter inode_iter = {};
+               struct bch_inode_unpacked new_inode;
+               u64 cpu = raw_smp_processor_id();
+
+               bch2_inode_init_early(c, &new_inode);
+               bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL);
+
+               new_inode.bi_subvol = subvolid;
+
+               int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
+                         bch2_btree_iter_traverse(&inode_iter) ?:
+                         bch2_inode_write(trans, &inode_iter, &new_inode);
+               bch2_trans_iter_exit(trans, &inode_iter);
+               if (ret)
+                       return ret;
+
+               inum = new_inode.bi_inum;
+       }
+
+       bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum);
+
+       struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol));
+       int ret = PTR_ERR_OR_ZERO(new_subvol);
+       if (ret)
+               return ret;
+
+       bkey_subvolume_init(&new_subvol->k_i);
+       new_subvol->k.p.offset  = subvolid;
+       new_subvol->v.snapshot  = cpu_to_le32(snapshotid);
+       new_subvol->v.inode     = cpu_to_le64(inum);
+       ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0);
+       if (ret)
+               return ret;
+
+       struct btree_iter iter;
+       struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter,
+                       BTREE_ID_snapshots, POS(0, snapshotid),
+                       0, snapshot);
+       ret = PTR_ERR_OR_ZERO(s);
+       bch_err_msg(c, ret, "getting snapshot %u", snapshotid);
+       if (ret)
+               return ret;
+
+       u32 snapshot_tree = le32_to_cpu(s->v.tree);
+
+       s->v.subvol = cpu_to_le32(subvolid);
+       SET_BCH_SNAPSHOT_SUBVOL(&s->v, true);
+       bch2_trans_iter_exit(trans, &iter);
+
+       struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter,
+                       BTREE_ID_snapshot_trees, POS(0, snapshot_tree),
+                       0, snapshot_tree);
+       ret = PTR_ERR_OR_ZERO(st);
+       bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree);
+       if (ret)
+               return ret;
+
+       if (!st->v.master_subvol)
+               st->v.master_subvol = cpu_to_le32(subvolid);
+
+       bch2_trans_iter_exit(trans, &iter);
+       return 0;
+}
+
+static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_inode_unpacked new_inode;
+
+       bch2_inode_init_early(c, &new_inode);
+       bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL);
+       new_inode.bi_size = size;
+       new_inode.bi_inum = inum;
+
+       return __bch2_fsck_write_inode(trans, &new_inode, snapshot);
+}
+
+static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum)
+{
+       struct btree_iter iter = {};
+
+       bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0);
+       struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter);
+       bch2_trans_iter_exit(trans, &iter);
+       int ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG);
+}
+
 struct snapshots_seen_entry {
        u32                             id;
        u32                             equiv;
@@ -1064,6 +1187,11 @@ static int check_inode(struct btree_trans *trans,
                if (ret && !bch2_err_matches(ret, ENOENT))
                        goto err;
 
+               if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+                       ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum);
+                       goto do_update;
+               }
+
                if (fsck_err_on(ret,
                                c, inode_bi_subvol_missing,
                                "inode %llu:%u bi_subvol points to missing subvolume %u",
@@ -1081,7 +1209,7 @@ static int check_inode(struct btree_trans *trans,
                        do_update = true;
                }
        }
-
+do_update:
        if (do_update) {
                ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot);
                bch_err_msg(c, ret, "in fsck updating inode");
@@ -1130,8 +1258,8 @@ static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_wal
                        i->count = count2;
 
                if (i->count != count2) {
-                       bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
-                               w->last_pos.inode, i->snapshot, i->count, count2);
+                       bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu",
+                                           w->last_pos.inode, i->snapshot, i->count, count2);
                        return -BCH_ERR_internal_fsck_err;
                }
 
@@ -1371,10 +1499,6 @@ static int check_overlapping_extents(struct btree_trans *trans,
                        goto err;
        }
 
-       ret = extent_ends_at(c, extent_ends, seen, k);
-       if (ret)
-               goto err;
-
        extent_ends->last_pos = k.k->p;
 err:
        return ret;
@@ -1438,6 +1562,17 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
                goto err;
 
        if (k.k->type != KEY_TYPE_whiteout) {
+               if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+                       ret =   reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?:
+                               bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+                       if (ret)
+                               goto err;
+
+                       inode->last_pos.inode--;
+                       ret = -BCH_ERR_transaction_restart_nested;
+                       goto err;
+               }
+
                if (fsck_err_on(!i, c, extent_in_missing_inode,
                                "extent in missing inode:\n  %s",
                                (printbuf_reset(&buf),
@@ -1504,6 +1639,12 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter,
 
                i->seen_this_pos = true;
        }
+
+       if (k.k->type != KEY_TYPE_whiteout) {
+               ret = extent_ends_at(c, extent_ends, s, k);
+               if (ret)
+                       goto err;
+       }
 out:
 err:
 fsck_err:
@@ -1584,8 +1725,8 @@ static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_
                        return count2;
 
                if (i->count != count2) {
-                       bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu",
-                               i->count, count2);
+                       bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu",
+                                           w->last_pos.inode, i->snapshot, i->count, count2);
                        i->count = count2;
                        if (i->inode.bi_nlink == i->count)
                                continue;
@@ -1782,6 +1923,7 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
        u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol);
        u32 target_subvol = le32_to_cpu(d.v->d_child_subvol);
        u32 parent_snapshot;
+       u32 new_parent_subvol = 0;
        u64 parent_inum;
        struct printbuf buf = PRINTBUF;
        int ret = 0;
@@ -1790,6 +1932,27 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
        if (ret && !bch2_err_matches(ret, ENOENT))
                return ret;
 
+       if (ret ||
+           (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) {
+               int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
+               if (ret2 && !bch2_err_matches(ret, ENOENT))
+                       return ret2;
+       }
+
+       if (ret &&
+           !new_parent_subvol &&
+           (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) {
+               /*
+                * Couldn't find a subvol for dirent's snapshot - but we lost
+                * subvols, so we need to reconstruct:
+                */
+               ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0);
+               if (ret)
+                       return ret;
+
+               parent_snapshot = d.k->p.snapshot;
+       }
+
        if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol,
                        "dirent parent_subvol points to missing subvolume\n%s",
                        (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) ||
@@ -1798,10 +1961,10 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
                        "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s",
                        parent_snapshot,
                        (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) {
-               u32 new_parent_subvol;
-               ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol);
-               if (ret)
-                       goto err;
+               if (!new_parent_subvol) {
+                       bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot);
+                       return -BCH_ERR_fsck_repair_unimplemented;
+               }
 
                struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent);
                ret = PTR_ERR_OR_ZERO(new_dirent);
@@ -1847,9 +2010,16 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
 
        ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot);
        if (ret && !bch2_err_matches(ret, ENOENT))
-               return ret;
+               goto err;
+
+       if (ret) {
+               bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum);
+               ret = -BCH_ERR_fsck_repair_unimplemented;
+               ret = 0;
+               goto err;
+       }
 
-       if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol,
+       if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol,
                        c, inode_bi_parent_wrong,
                        "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u",
                        target_inum,
@@ -1857,13 +2027,13 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *
                subvol_root.bi_parent_subvol = parent_subvol;
                ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot);
                if (ret)
-                       return ret;
+                       goto err;
        }
 
        ret = check_dirent_target(trans, iter, d, &subvol_root,
                                  target_snapshot);
        if (ret)
-               return ret;
+               goto err;
 out:
 err:
 fsck_err:
@@ -1880,7 +2050,6 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
                        struct snapshots_seen *s)
 {
        struct bch_fs *c = trans->c;
-       struct bkey_s_c_dirent d;
        struct inode_walker_entry *i;
        struct printbuf buf = PRINTBUF;
        struct bpos equiv;
@@ -1919,6 +2088,17 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
                *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode);
        dir->first_this_inode = false;
 
+       if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) {
+               ret =   reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?:
+                       bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
+               if (ret)
+                       goto err;
+
+               dir->last_pos.inode--;
+               ret = -BCH_ERR_transaction_restart_nested;
+               goto err;
+       }
+
        if (fsck_err_on(!i, c, dirent_in_missing_dir_inode,
                        "dirent in nonexisting directory:\n%s",
                        (printbuf_reset(&buf),
@@ -1953,7 +2133,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
        if (k.k->type != KEY_TYPE_dirent)
                goto out;
 
-       d = bkey_s_c_to_dirent(k);
+       struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k);
 
        if (d.v->d_type == DT_SUBVOL) {
                ret = check_dirent_to_subvol(trans, iter, d);
@@ -2098,17 +2278,21 @@ static int check_root_trans(struct btree_trans *trans)
 
        if (mustfix_fsck_err_on(ret, c, root_subvol_missing,
                                "root subvol missing")) {
-               struct bkey_i_subvolume root_subvol;
+               struct bkey_i_subvolume *root_subvol =
+                       bch2_trans_kmalloc(trans, sizeof(*root_subvol));
+               ret = PTR_ERR_OR_ZERO(root_subvol);
+               if (ret)
+                       goto err;
 
                snapshot        = U32_MAX;
                inum            = BCACHEFS_ROOT_INO;
 
-               bkey_subvolume_init(&root_subvol.k_i);
-               root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL;
-               root_subvol.v.flags     = 0;
-               root_subvol.v.snapshot  = cpu_to_le32(snapshot);
-               root_subvol.v.inode     = cpu_to_le64(inum);
-               ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0);
+               bkey_subvolume_init(&root_subvol->k_i);
+               root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL;
+               root_subvol->v.flags    = 0;
+               root_subvol->v.snapshot = cpu_to_le32(snapshot);
+               root_subvol->v.inode    = cpu_to_le64(inum);
+               ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0);
                bch_err_msg(c, ret, "writing root subvol");
                if (ret)
                        goto err;
index 2b5e06770ab39ea0844342d6298b5ab37f26667d..ca4a066e9a5428aa68f88d77f59e9c365a580d6c 100644 (file)
@@ -552,8 +552,8 @@ static void __bch2_inode_unpacked_to_text(struct printbuf *out,
        prt_printf(out, "bi_sectors=%llu", inode->bi_sectors);
        prt_newline(out);
 
-       prt_newline(out);
        prt_printf(out, "bi_version=%llu", inode->bi_version);
+       prt_newline(out);
 
 #define x(_name, _bits)                                                \
        prt_printf(out, #_name "=%llu", (u64) inode->_name);    \
index 1baf78594ccaf85d7d89fea4fc938a7f700d6dc0..82f9170dab3fdcdbd422a9d9ced80db803fe6af4 100644 (file)
@@ -264,6 +264,7 @@ static int __bch2_resume_logged_op_truncate(struct btree_trans *trans,
                ret = 0;
 err:
        bch2_logged_op_finish(trans, op_k);
+       bch_err_fn(c, ret);
        return ret;
 }
 
@@ -476,6 +477,7 @@ case LOGGED_OP_FINSERT_finish:
        break;
        }
 err:
+       bch_err_fn(c, ret);
        bch2_logged_op_finish(trans, op_k);
        bch2_trans_iter_exit(trans, &iter);
        return ret;
index b5303874fc35b33e5e6ac3878a03af8ab1a882be..37a024e034d4953dd1ecc3e813112468722b4595 100644 (file)
@@ -95,8 +95,7 @@ out:
        return ret ?: bch2_blacklist_table_initialize(c);
 }
 
-static int journal_seq_blacklist_table_cmp(const void *_l,
-                                          const void *_r, size_t size)
+static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
 {
        const struct journal_seq_blacklist_table_entry *l = _l;
        const struct journal_seq_blacklist_table_entry *r = _r;
index 9fac838d123e8e40fb836d895afccde634f2d54c..b82f8209041ffb47506d7971382d614cdda9005b 100644 (file)
@@ -37,7 +37,6 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
        const struct bch_logged_op_fn *fn = logged_op_fn(k.k->type);
        struct bkey_buf sk;
        u32 restart_count = trans->restart_count;
-       int ret;
 
        if (!fn)
                return 0;
@@ -45,11 +44,11 @@ static int resume_logged_op(struct btree_trans *trans, struct btree_iter *iter,
        bch2_bkey_buf_init(&sk);
        bch2_bkey_buf_reassemble(&sk, c, k);
 
-       ret =   drop_locks_do(trans, (bch2_fs_lazy_rw(c), 0)) ?:
-               fn->resume(trans, sk.k) ?: trans_was_restarted(trans, restart_count);
+       fn->resume(trans, sk.k);
 
        bch2_bkey_buf_exit(&sk, c);
-       return ret;
+
+       return trans_was_restarted(trans, restart_count);
 }
 
 int bch2_resume_logged_ops(struct bch_fs *c)
index db63b3f3b338ad6405ceb34c4526a52765cca7af..4c298e74723db3023b9120cf1f823e46bfbaec4c 100644 (file)
@@ -136,20 +136,8 @@ static void mean_and_variance_test_1(struct kunit *test)
                        d, mean, stddev, weighted_mean, weighted_stddev);
 }
 
-static void mean_and_variance_test_2(struct kunit *test)
-{
-       s64 d[]                 = { 100, 10, 10, 10, 10, 10, 10 };
-       s64 mean[]              = {  10, 10, 10, 10, 10, 10, 10 };
-       s64 stddev[]            = {   9,  9,  9,  9,  9,  9,  9 };
-       s64 weighted_mean[]     = {  32, 27, 22, 19, 17, 15, 14 };
-       s64 weighted_stddev[]   = {  38, 35, 31, 27, 24, 21, 18 };
-
-       do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
-                       d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
 /* Test behaviour where we switch from one steady state to another: */
-static void mean_and_variance_test_3(struct kunit *test)
+static void mean_and_variance_test_2(struct kunit *test)
 {
        s64 d[]                 = { 100, 100, 100, 100, 100 };
        s64 mean[]              = {  22,  32,  40,  46,  50 };
@@ -161,18 +149,6 @@ static void mean_and_variance_test_3(struct kunit *test)
                        d, mean, stddev, weighted_mean, weighted_stddev);
 }
 
-static void mean_and_variance_test_4(struct kunit *test)
-{
-       s64 d[]                 = { 100, 100, 100, 100, 100 };
-       s64 mean[]              = {  10,  11,  12,  13,  14 };
-       s64 stddev[]            = {   9,  13,  15,  17,  19 };
-       s64 weighted_mean[]     = {  32,  49,  61,  71,  78 };
-       s64 weighted_stddev[]   = {  38,  44,  44,  41,  38 };
-
-       do_mean_and_variance_test(test, 10, 6, ARRAY_SIZE(d), 2,
-                       d, mean, stddev, weighted_mean, weighted_stddev);
-}
-
 static void mean_and_variance_fast_divpow2(struct kunit *test)
 {
        s64 i;
@@ -230,8 +206,6 @@ static struct kunit_case mean_and_variance_test_cases[] = {
        KUNIT_CASE(mean_and_variance_weighted_advanced_test),
        KUNIT_CASE(mean_and_variance_test_1),
        KUNIT_CASE(mean_and_variance_test_2),
-       KUNIT_CASE(mean_and_variance_test_3),
-       KUNIT_CASE(mean_and_variance_test_4),
        {}
 };
 
index 08ea0cfc4aef08acfd4d0fe33e0d8227f212cb02..e1800c4119b5fbaf8ebbfcdaef996e1dd9c35ca8 100644 (file)
@@ -7,6 +7,7 @@
 #include "disk_groups.h"
 #include "error.h"
 #include "opts.h"
+#include "recovery_passes.h"
 #include "super-io.h"
 #include "util.h"
 
@@ -205,6 +206,9 @@ const struct bch_option bch2_opt_table[] = {
 #define OPT_STR(_choices)      .type = BCH_OPT_STR,                    \
                                .min = 0, .max = ARRAY_SIZE(_choices),  \
                                .choices = _choices
+#define OPT_STR_NOLIMIT(_choices)      .type = BCH_OPT_STR,            \
+                               .min = 0, .max = U64_MAX,               \
+                               .choices = _choices
 #define OPT_FN(_fn)            .type = BCH_OPT_FN, .fn = _fn
 
 #define x(_name, _bits, _flags, _type, _sb_opt, _default, _hint, _help)        \
index 136083c11f3a3aecc575501c33c0b3868f38113f..1ac4135cca1c3dccc71a75a0d062ee30df33111c 100644 (file)
@@ -362,12 +362,17 @@ enum fsck_err_opts {
          OPT_FS|OPT_MOUNT,                                             \
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
-         NULL,         "Don't replay the journal")                     \
-       x(keep_journal,                 u8,                             \
+         NULL,         "Exit recovery immediately prior to journal replay")\
+       x(recovery_pass_last,           u8,                             \
+         OPT_FS|OPT_MOUNT,                                             \
+         OPT_STR_NOLIMIT(bch2_recovery_passes),                        \
+         BCH2_NO_SB_OPT,               0,                              \
+         NULL,         "Exit recovery after specified pass")           \
+       x(retain_recovery_info,         u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
          BCH2_NO_SB_OPT,               false,                          \
-         NULL,         "Don't free journal entries/keys after startup")\
+         NULL,         "Don't free journal entries/keys, scanned btree nodes after startup")\
        x(read_entire_journal,          u8,                             \
          0,                                                            \
          OPT_BOOL(),                                                   \
index 03f9d6afe467889b02a483561277b0d539a836f5..b76c16152579c6d3e5a51dbf54c839392c0ce0b2 100644 (file)
@@ -1,35 +1,31 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
-#include "backpointers.h"
-#include "bkey_buf.h"
 #include "alloc_background.h"
-#include "btree_gc.h"
+#include "bkey_buf.h"
 #include "btree_journal_iter.h"
+#include "btree_node_scan.h"
 #include "btree_update.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
 #include "buckets.h"
 #include "dirent.h"
-#include "ec.h"
 #include "errcode.h"
 #include "error.h"
 #include "fs-common.h"
-#include "fsck.h"
 #include "journal_io.h"
 #include "journal_reclaim.h"
 #include "journal_seq_blacklist.h"
-#include "lru.h"
 #include "logged_ops.h"
 #include "move.h"
 #include "quota.h"
 #include "rebalance.h"
 #include "recovery.h"
+#include "recovery_passes.h"
 #include "replicas.h"
 #include "sb-clean.h"
 #include "sb-downgrade.h"
 #include "snapshot.h"
-#include "subvolume.h"
 #include "super-io.h"
 
 #include <linux/sort.h>
 
 #define QSTR(n) { { { .len = strlen(n) } }, .name = n }
 
+void bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
+{
+       u64 b = BIT_ULL(btree);
+
+       if (!(c->sb.btrees_lost_data & b)) {
+               bch_err(c, "flagging btree %s lost data", bch2_btree_id_str(btree));
+
+               mutex_lock(&c->sb_lock);
+               bch2_sb_field_get(c->disk_sb.sb, ext)->btrees_lost_data |= cpu_to_le64(b);
+               bch2_write_super(c);
+               mutex_unlock(&c->sb_lock);
+       }
+}
+
 static bool btree_id_is_alloc(enum btree_id id)
 {
        switch (id) {
@@ -52,7 +62,7 @@ static bool btree_id_is_alloc(enum btree_id id)
 }
 
 /* for -o reconstruct_alloc: */
-static void do_reconstruct_alloc(struct bch_fs *c)
+static void bch2_reconstruct_alloc(struct bch_fs *c)
 {
        bch2_journal_log_msg(c, "dropping alloc info");
        bch_info(c, "dropping and reconstructing all alloc info");
@@ -87,15 +97,17 @@ static void do_reconstruct_alloc(struct bch_fs *c)
 
        c->recovery_passes_explicit |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
 
-       struct journal_keys *keys = &c->journal_keys;
-       size_t src, dst;
 
-       move_gap(keys, keys->nr);
-
-       for (src = 0, dst = 0; src < keys->nr; src++)
-               if (!btree_id_is_alloc(keys->data[src].btree_id))
-                       keys->data[dst++] = keys->data[src];
-       keys->nr = keys->gap = dst;
+       bch2_shoot_down_journal_keys(c, BTREE_ID_alloc,
+                                    0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+       bch2_shoot_down_journal_keys(c, BTREE_ID_backpointers,
+                                    0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+       bch2_shoot_down_journal_keys(c, BTREE_ID_need_discard,
+                                    0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+       bch2_shoot_down_journal_keys(c, BTREE_ID_freespace,
+                                    0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+       bch2_shoot_down_journal_keys(c, BTREE_ID_bucket_gens,
+                                    0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
 }
 
 /*
@@ -186,7 +198,7 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r)
        return cmp_int(l->journal_seq, r->journal_seq);
 }
 
-static int bch2_journal_replay(struct bch_fs *c)
+int bch2_journal_replay(struct bch_fs *c)
 {
        struct journal_keys *keys = &c->journal_keys;
        DARRAY(struct journal_key *) keys_sorted = { 0 };
@@ -194,6 +206,7 @@ static int bch2_journal_replay(struct bch_fs *c)
        u64 start_seq   = c->journal_replay_seq_start;
        u64 end_seq     = c->journal_replay_seq_start;
        struct btree_trans *trans = bch2_trans_get(c);
+       bool immediate_flush = false;
        int ret = 0;
 
        if (keys->nr) {
@@ -215,6 +228,13 @@ static int bch2_journal_replay(struct bch_fs *c)
        darray_for_each(*keys, k) {
                cond_resched();
 
+               /*
+                * k->allocated means the key wasn't read in from the journal,
+                * rather it was from early repair code
+                */
+               if (k->allocated)
+                       immediate_flush = true;
+
                /* Skip fastpath if we're low on space in the journal */
                ret = c->journal.watermark ? -1 :
                        commit_do(trans, NULL, NULL,
@@ -266,7 +286,8 @@ static int bch2_journal_replay(struct bch_fs *c)
        bch2_trans_put(trans);
        trans = NULL;
 
-       if (!c->opts.keep_journal)
+       if (!c->opts.retain_recovery_info &&
+           c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
                bch2_journal_keys_put_initial(c);
 
        replay_now_at(j, j->replay_journal_seq_end);
@@ -274,6 +295,12 @@ static int bch2_journal_replay(struct bch_fs *c)
 
        bch2_journal_set_replay_done(j);
 
+       /* if we did any repair, flush it immediately */
+       if (immediate_flush) {
+               bch2_journal_flush_all_pins(&c->journal);
+               ret = bch2_journal_meta(&c->journal);
+       }
+
        if (keys->nr)
                bch2_journal_log_msg(c, "journal replay finished");
 err:
@@ -423,10 +450,9 @@ static int journal_replay_early(struct bch_fs *c,
 
 static int read_btree_roots(struct bch_fs *c)
 {
-       unsigned i;
        int ret = 0;
 
-       for (i = 0; i < btree_id_nr_alive(c); i++) {
+       for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
                struct btree_root *r = bch2_btree_id_root(c, i);
 
                if (!r->alive)
@@ -435,186 +461,46 @@ static int read_btree_roots(struct bch_fs *c)
                if (btree_id_is_alloc(i) && c->opts.reconstruct_alloc)
                        continue;
 
-               if (r->error) {
-                       __fsck_err(c,
-                                  btree_id_is_alloc(i)
-                                  ? FSCK_CAN_IGNORE : 0,
-                                  btree_root_bkey_invalid,
-                                  "invalid btree root %s",
-                                  bch2_btree_id_str(i));
-                       if (i == BTREE_ID_alloc)
+               if (mustfix_fsck_err_on((ret = r->error),
+                                       c, btree_root_bkey_invalid,
+                                       "invalid btree root %s",
+                                       bch2_btree_id_str(i)) ||
+                   mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
+                                       c, btree_root_read_error,
+                                       "error reading btree root %s l=%u: %s",
+                                       bch2_btree_id_str(i), r->level, bch2_err_str(ret))) {
+                       if (btree_id_is_alloc(i)) {
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_allocations);
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_info);
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_lrus);
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers);
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_alloc_to_lru_refs);
                                c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
-               }
+                               r->error = 0;
+                       } else if (!(c->recovery_passes_explicit & BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes))) {
+                               bch_info(c, "will run btree node scan");
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_scan_for_btree_nodes);
+                               c->recovery_passes_explicit |= BIT_ULL(BCH_RECOVERY_PASS_check_topology);
+                       }
 
-               ret = bch2_btree_root_read(c, i, &r->key, r->level);
-               if (ret) {
-                       fsck_err(c,
-                                btree_root_read_error,
-                                "error reading btree root %s",
-                                bch2_btree_id_str(i));
-                       if (btree_id_is_alloc(i))
-                               c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
                        ret = 0;
+                       bch2_btree_lost_data(c, i);
                }
        }
 
-       for (i = 0; i < BTREE_ID_NR; i++) {
+       for (unsigned i = 0; i < BTREE_ID_NR; i++) {
                struct btree_root *r = bch2_btree_id_root(c, i);
 
-               if (!r->b) {
+               if (!r->b && !r->error) {
                        r->alive = false;
                        r->level = 0;
-                       bch2_btree_root_alloc(c, i);
+                       bch2_btree_root_alloc_fake(c, i, 0);
                }
        }
 fsck_err:
        return ret;
 }
 
-static int bch2_initialize_subvolumes(struct bch_fs *c)
-{
-       struct bkey_i_snapshot_tree     root_tree;
-       struct bkey_i_snapshot          root_snapshot;
-       struct bkey_i_subvolume         root_volume;
-       int ret;
-
-       bkey_snapshot_tree_init(&root_tree.k_i);
-       root_tree.k.p.offset            = 1;
-       root_tree.v.master_subvol       = cpu_to_le32(1);
-       root_tree.v.root_snapshot       = cpu_to_le32(U32_MAX);
-
-       bkey_snapshot_init(&root_snapshot.k_i);
-       root_snapshot.k.p.offset = U32_MAX;
-       root_snapshot.v.flags   = 0;
-       root_snapshot.v.parent  = 0;
-       root_snapshot.v.subvol  = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
-       root_snapshot.v.tree    = cpu_to_le32(1);
-       SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
-
-       bkey_subvolume_init(&root_volume.k_i);
-       root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
-       root_volume.v.flags     = 0;
-       root_volume.v.snapshot  = cpu_to_le32(U32_MAX);
-       root_volume.v.inode     = cpu_to_le64(BCACHEFS_ROOT_INO);
-
-       ret =   bch2_btree_insert(c, BTREE_ID_snapshot_trees,   &root_tree.k_i, NULL, 0) ?:
-               bch2_btree_insert(c, BTREE_ID_snapshots,        &root_snapshot.k_i, NULL, 0) ?:
-               bch2_btree_insert(c, BTREE_ID_subvolumes,       &root_volume.k_i, NULL, 0);
-       bch_err_fn(c, ret);
-       return ret;
-}
-
-static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
-{
-       struct btree_iter iter;
-       struct bkey_s_c k;
-       struct bch_inode_unpacked inode;
-       int ret;
-
-       k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
-                              SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
-       ret = bkey_err(k);
-       if (ret)
-               return ret;
-
-       if (!bkey_is_inode(k.k)) {
-               bch_err(trans->c, "root inode not found");
-               ret = -BCH_ERR_ENOENT_inode;
-               goto err;
-       }
-
-       ret = bch2_inode_unpack(k, &inode);
-       BUG_ON(ret);
-
-       inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
-
-       ret = bch2_inode_write(trans, &iter, &inode);
-err:
-       bch2_trans_iter_exit(trans, &iter);
-       return ret;
-}
-
-/* set bi_subvol on root inode */
-noinline_for_stack
-static int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
-{
-       int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
-                               __bch2_fs_upgrade_for_subvolumes(trans));
-       bch_err_fn(c, ret);
-       return ret;
-}
-
-const char * const bch2_recovery_passes[] = {
-#define x(_fn, ...)    #_fn,
-       BCH_RECOVERY_PASSES()
-#undef x
-       NULL
-};
-
-static int bch2_check_allocations(struct bch_fs *c)
-{
-       return bch2_gc(c, true, c->opts.norecovery);
-}
-
-static int bch2_set_may_go_rw(struct bch_fs *c)
-{
-       struct journal_keys *keys = &c->journal_keys;
-
-       /*
-        * After we go RW, the journal keys buffer can't be modified (except for
-        * setting journal_key->overwritten: it will be accessed by multiple
-        * threads
-        */
-       move_gap(keys, keys->nr);
-
-       set_bit(BCH_FS_may_go_rw, &c->flags);
-
-       if (keys->nr || c->opts.fsck || !c->sb.clean)
-               return bch2_fs_read_write_early(c);
-       return 0;
-}
-
-struct recovery_pass_fn {
-       int             (*fn)(struct bch_fs *);
-       unsigned        when;
-};
-
-static struct recovery_pass_fn recovery_pass_fns[] = {
-#define x(_fn, _id, _when)     { .fn = bch2_##_fn, .when = _when },
-       BCH_RECOVERY_PASSES()
-#undef x
-};
-
-u64 bch2_recovery_passes_to_stable(u64 v)
-{
-       static const u8 map[] = {
-#define x(n, id, ...)  [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
-       BCH_RECOVERY_PASSES()
-#undef x
-       };
-
-       u64 ret = 0;
-       for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
-               if (v & BIT_ULL(i))
-                       ret |= BIT_ULL(map[i]);
-       return ret;
-}
-
-u64 bch2_recovery_passes_from_stable(u64 v)
-{
-       static const u8 map[] = {
-#define x(n, id, ...)  [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
-       BCH_RECOVERY_PASSES()
-#undef x
-       };
-
-       u64 ret = 0;
-       for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
-               if (v & BIT_ULL(i))
-                       ret |= BIT_ULL(map[i]);
-       return ret;
-}
-
 static bool check_version_upgrade(struct bch_fs *c)
 {
        unsigned latest_version = bcachefs_metadata_version_current;
@@ -687,96 +573,6 @@ static bool check_version_upgrade(struct bch_fs *c)
        return false;
 }
 
-u64 bch2_fsck_recovery_passes(void)
-{
-       u64 ret = 0;
-
-       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
-               if (recovery_pass_fns[i].when & PASS_FSCK)
-                       ret |= BIT_ULL(i);
-       return ret;
-}
-
-static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
-       struct recovery_pass_fn *p = recovery_pass_fns + pass;
-
-       if (c->opts.norecovery && pass > BCH_RECOVERY_PASS_snapshots_read)
-               return false;
-       if (c->recovery_passes_explicit & BIT_ULL(pass))
-               return true;
-       if ((p->when & PASS_FSCK) && c->opts.fsck)
-               return true;
-       if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
-               return true;
-       if (p->when & PASS_ALWAYS)
-               return true;
-       return false;
-}
-
-static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
-{
-       struct recovery_pass_fn *p = recovery_pass_fns + pass;
-       int ret;
-
-       if (!(p->when & PASS_SILENT))
-               bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
-                          bch2_recovery_passes[pass]);
-       ret = p->fn(c);
-       if (ret)
-               return ret;
-       if (!(p->when & PASS_SILENT))
-               bch2_print(c, KERN_CONT " done\n");
-
-       return 0;
-}
-
-static int bch2_run_recovery_passes(struct bch_fs *c)
-{
-       int ret = 0;
-
-       while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
-               if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
-                       unsigned pass = c->curr_recovery_pass;
-
-                       ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
-                       if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
-                           (ret && c->curr_recovery_pass < pass))
-                               continue;
-                       if (ret)
-                               break;
-
-                       c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
-               }
-               c->curr_recovery_pass++;
-               c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
-       }
-
-       return ret;
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *c)
-{
-       int ret = 0;
-
-       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
-               struct recovery_pass_fn *p = recovery_pass_fns + i;
-
-               if (!(p->when & PASS_ONLINE))
-                       continue;
-
-               ret = bch2_run_recovery_pass(c, i);
-               if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
-                       i = c->curr_recovery_pass;
-                       continue;
-               }
-               if (ret)
-                       break;
-       }
-
-       return ret;
-}
-
 int bch2_fs_recovery(struct bch_fs *c)
 {
        struct bch_sb_field_clean *clean = NULL;
@@ -809,24 +605,14 @@ int bch2_fs_recovery(struct bch_fs *c)
                goto err;
        }
 
-       if (c->opts.fsck && c->opts.norecovery) {
-               bch_err(c, "cannot select both norecovery and fsck");
-               ret = -EINVAL;
-               goto err;
-       }
+       if (c->opts.norecovery)
+               c->opts.recovery_pass_last = BCH_RECOVERY_PASS_journal_replay - 1;
 
        if (!c->opts.nochanges) {
                mutex_lock(&c->sb_lock);
+               struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
                bool write_sb = false;
 
-               struct bch_sb_field_ext *ext =
-                       bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
-               if (!ext) {
-                       ret = -BCH_ERR_ENOSPC_sb;
-                       mutex_unlock(&c->sb_lock);
-                       goto err;
-               }
-
                if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
                        ext->recovery_passes_required[0] |=
                                cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
@@ -885,7 +671,7 @@ int bch2_fs_recovery(struct bch_fs *c)
                goto err;
        }
 
-       if (!c->sb.clean || c->opts.fsck || c->opts.keep_journal) {
+       if (!c->sb.clean || c->opts.fsck || c->opts.retain_recovery_info) {
                struct genradix_iter iter;
                struct journal_replay **i;
 
@@ -965,7 +751,7 @@ use_clean:
        c->journal_replay_seq_end       = blacklist_seq - 1;
 
        if (c->opts.reconstruct_alloc)
-               do_reconstruct_alloc(c);
+               bch2_reconstruct_alloc(c);
 
        zero_out_btree_mem_ptr(&c->journal_keys);
 
@@ -1017,6 +803,12 @@ use_clean:
 
        clear_bit(BCH_FS_fsck_running, &c->flags);
 
+       /* fsync if we fixed errors */
+       if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
+               bch2_journal_flush_all_pins(&c->journal);
+               bch2_journal_meta(&c->journal);
+       }
+
        /* If we fixed errors, verify that fs is actually clean now: */
        if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
            test_bit(BCH_FS_errors_fixed, &c->flags) &&
@@ -1051,6 +843,7 @@ use_clean:
        }
 
        mutex_lock(&c->sb_lock);
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
        bool write_sb = false;
 
        if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
@@ -1064,15 +857,18 @@ use_clean:
                write_sb = true;
        }
 
-       if (!test_bit(BCH_FS_error, &c->flags)) {
-               struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
-               if (ext &&
-                   (!bch2_is_zero(ext->recovery_passes_required, sizeof(ext->recovery_passes_required)) ||
-                    !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent)))) {
-                       memset(ext->recovery_passes_required, 0, sizeof(ext->recovery_passes_required));
-                       memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
-                       write_sb = true;
-               }
+       if (!test_bit(BCH_FS_error, &c->flags) &&
+           !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
+               memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
+               write_sb = true;
+       }
+
+       if (c->opts.fsck &&
+           !test_bit(BCH_FS_error, &c->flags) &&
+           c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
+           ext->btrees_lost_data) {
+               ext->btrees_lost_data = 0;
+               write_sb = true;
        }
 
        if (c->opts.fsck &&
@@ -1113,9 +909,10 @@ use_clean:
 out:
        bch2_flush_fsck_errs(c);
 
-       if (!c->opts.keep_journal &&
-           test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags))
+       if (!c->opts.retain_recovery_info) {
                bch2_journal_keys_put_initial(c);
+               bch2_find_btree_nodes_exit(&c->found_btree_nodes);
+       }
        kfree(clean);
 
        if (!ret &&
@@ -1141,6 +938,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        int ret;
 
        bch_notice(c, "initializing new filesystem");
+       set_bit(BCH_FS_new_fs, &c->flags);
 
        mutex_lock(&c->sb_lock);
        c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
@@ -1155,11 +953,11 @@ int bch2_fs_initialize(struct bch_fs *c)
        }
        mutex_unlock(&c->sb_lock);
 
-       c->curr_recovery_pass = ARRAY_SIZE(recovery_pass_fns);
+       c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
        set_bit(BCH_FS_may_go_rw, &c->flags);
 
        for (unsigned i = 0; i < BTREE_ID_NR; i++)
-               bch2_btree_root_alloc(c, i);
+               bch2_btree_root_alloc_fake(c, i, 0);
 
        for_each_member_device(c, ca)
                bch2_dev_usage_init(ca);
@@ -1230,7 +1028,7 @@ int bch2_fs_initialize(struct bch_fs *c)
        if (ret)
                goto err;
 
-       c->recovery_pass_done = ARRAY_SIZE(recovery_pass_fns) - 1;
+       c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
 
        if (enabled_qtypes(c)) {
                ret = bch2_fs_quota_read(c);
index 4e9d24719b2e85c356fa88a0bd3923c3a2ff30cc..4bf818de1f2feb1f6010eaff3a8eccbbf1e6d2c6 100644 (file)
@@ -2,37 +2,9 @@
 #ifndef _BCACHEFS_RECOVERY_H
 #define _BCACHEFS_RECOVERY_H
 
-extern const char * const bch2_recovery_passes[];
+void bch2_btree_lost_data(struct bch_fs *, enum btree_id);
 
-u64 bch2_recovery_passes_to_stable(u64 v);
-u64 bch2_recovery_passes_from_stable(u64 v);
-
-/*
- * For when we need to rewind recovery passes and run a pass we skipped:
- */
-static inline int bch2_run_explicit_recovery_pass(struct bch_fs *c,
-                                                 enum bch_recovery_pass pass)
-{
-       if (c->recovery_passes_explicit & BIT_ULL(pass))
-               return 0;
-
-       bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
-                bch2_recovery_passes[pass], pass,
-                bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
-
-       c->recovery_passes_explicit |= BIT_ULL(pass);
-
-       if (c->curr_recovery_pass >= pass) {
-               c->curr_recovery_pass = pass;
-               c->recovery_passes_complete &= (1ULL << pass) >> 1;
-               return -BCH_ERR_restart_recovery;
-       } else {
-               return 0;
-       }
-}
-
-int bch2_run_online_recovery_passes(struct bch_fs *);
-u64 bch2_fsck_recovery_passes(void);
+int bch2_journal_replay(struct bch_fs *);
 
 int bch2_fs_recovery(struct bch_fs *);
 int bch2_fs_initialize(struct bch_fs *);
diff --git a/fs/bcachefs/recovery_passes.c b/fs/bcachefs/recovery_passes.c
new file mode 100644 (file)
index 0000000..cb50146
--- /dev/null
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "alloc_background.h"
+#include "backpointers.h"
+#include "btree_gc.h"
+#include "btree_node_scan.h"
+#include "ec.h"
+#include "fsck.h"
+#include "inode.h"
+#include "journal.h"
+#include "lru.h"
+#include "logged_ops.h"
+#include "rebalance.h"
+#include "recovery.h"
+#include "recovery_passes.h"
+#include "snapshot.h"
+#include "subvolume.h"
+#include "super.h"
+#include "super-io.h"
+
+const char * const bch2_recovery_passes[] = {
+#define x(_fn, ...)    #_fn,
+       BCH_RECOVERY_PASSES()
+#undef x
+       NULL
+};
+
+static int bch2_check_allocations(struct bch_fs *c)
+{
+       return bch2_gc(c, true, false);
+}
+
+static int bch2_set_may_go_rw(struct bch_fs *c)
+{
+       struct journal_keys *keys = &c->journal_keys;
+
+       /*
+        * After we go RW, the journal keys buffer can't be modified (except for
+        * setting journal_key->overwritten: it will be accessed by multiple
+        * threads
+        */
+       move_gap(keys, keys->nr);
+
+       set_bit(BCH_FS_may_go_rw, &c->flags);
+
+       if (keys->nr || c->opts.fsck || !c->sb.clean)
+               return bch2_fs_read_write_early(c);
+       return 0;
+}
+
+struct recovery_pass_fn {
+       int             (*fn)(struct bch_fs *);
+       unsigned        when;
+};
+
+static struct recovery_pass_fn recovery_pass_fns[] = {
+#define x(_fn, _id, _when)     { .fn = bch2_##_fn, .when = _when },
+       BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static const u8 passes_to_stable_map[] = {
+#define x(n, id, ...)  [BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
+       BCH_RECOVERY_PASSES()
+#undef x
+};
+
+static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
+{
+       return passes_to_stable_map[pass];
+}
+
+u64 bch2_recovery_passes_to_stable(u64 v)
+{
+       u64 ret = 0;
+       for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
+               if (v & BIT_ULL(i))
+                       ret |= BIT_ULL(passes_to_stable_map[i]);
+       return ret;
+}
+
+u64 bch2_recovery_passes_from_stable(u64 v)
+{
+       static const u8 map[] = {
+#define x(n, id, ...)  [BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
+       BCH_RECOVERY_PASSES()
+#undef x
+       };
+
+       u64 ret = 0;
+       for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
+               if (v & BIT_ULL(i))
+                       ret |= BIT_ULL(map[i]);
+       return ret;
+}
+
+/*
+ * For when we need to rewind recovery passes and run a pass we skipped:
+ */
+int bch2_run_explicit_recovery_pass(struct bch_fs *c,
+                                   enum bch_recovery_pass pass)
+{
+       if (c->recovery_passes_explicit & BIT_ULL(pass))
+               return 0;
+
+       bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
+                bch2_recovery_passes[pass], pass,
+                bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
+
+       c->recovery_passes_explicit |= BIT_ULL(pass);
+
+       if (c->curr_recovery_pass >= pass) {
+               c->curr_recovery_pass = pass;
+               c->recovery_passes_complete &= (1ULL << pass) >> 1;
+               return -BCH_ERR_restart_recovery;
+       } else {
+               return 0;
+       }
+}
+
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
+                                              enum bch_recovery_pass pass)
+{
+       enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+       mutex_lock(&c->sb_lock);
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       if (!test_bit_le64(s, ext->recovery_passes_required)) {
+               __set_bit_le64(s, ext->recovery_passes_required);
+               bch2_write_super(c);
+       }
+       mutex_unlock(&c->sb_lock);
+
+       return bch2_run_explicit_recovery_pass(c, pass);
+}
+
+static void bch2_clear_recovery_pass_required(struct bch_fs *c,
+                                             enum bch_recovery_pass pass)
+{
+       enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
+
+       mutex_lock(&c->sb_lock);
+       struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
+
+       if (test_bit_le64(s, ext->recovery_passes_required)) {
+               __clear_bit_le64(s, ext->recovery_passes_required);
+               bch2_write_super(c);
+       }
+       mutex_unlock(&c->sb_lock);
+}
+
+u64 bch2_fsck_recovery_passes(void)
+{
+       u64 ret = 0;
+
+       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
+               if (recovery_pass_fns[i].when & PASS_FSCK)
+                       ret |= BIT_ULL(i);
+       return ret;
+}
+
+static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+       struct recovery_pass_fn *p = recovery_pass_fns + pass;
+
+       if (c->recovery_passes_explicit & BIT_ULL(pass))
+               return true;
+       if ((p->when & PASS_FSCK) && c->opts.fsck)
+               return true;
+       if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
+               return true;
+       if (p->when & PASS_ALWAYS)
+               return true;
+       return false;
+}
+
+static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
+{
+       struct recovery_pass_fn *p = recovery_pass_fns + pass;
+       int ret;
+
+       if (!(p->when & PASS_SILENT))
+               bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
+                          bch2_recovery_passes[pass]);
+       ret = p->fn(c);
+       if (ret)
+               return ret;
+       if (!(p->when & PASS_SILENT))
+               bch2_print(c, KERN_CONT " done\n");
+
+       return 0;
+}
+
+int bch2_run_online_recovery_passes(struct bch_fs *c)
+{
+       int ret = 0;
+
+       for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
+               struct recovery_pass_fn *p = recovery_pass_fns + i;
+
+               if (!(p->when & PASS_ONLINE))
+                       continue;
+
+               ret = bch2_run_recovery_pass(c, i);
+               if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
+                       i = c->curr_recovery_pass;
+                       continue;
+               }
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+int bch2_run_recovery_passes(struct bch_fs *c)
+{
+       int ret = 0;
+
+       while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
+               if (c->opts.recovery_pass_last &&
+                   c->curr_recovery_pass > c->opts.recovery_pass_last)
+                       break;
+
+               if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
+                       unsigned pass = c->curr_recovery_pass;
+
+                       ret = bch2_run_recovery_pass(c, c->curr_recovery_pass);
+                       if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
+                           (ret && c->curr_recovery_pass < pass))
+                               continue;
+                       if (ret)
+                               break;
+
+                       c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
+               }
+
+               c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
+
+               if (!test_bit(BCH_FS_error, &c->flags))
+                       bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
+
+               c->curr_recovery_pass++;
+       }
+
+       return ret;
+}
diff --git a/fs/bcachefs/recovery_passes.h b/fs/bcachefs/recovery_passes.h
new file mode 100644 (file)
index 0000000..99b464e
--- /dev/null
@@ -0,0 +1,17 @@
+#ifndef _BCACHEFS_RECOVERY_PASSES_H
+#define _BCACHEFS_RECOVERY_PASSES_H
+
+extern const char * const bch2_recovery_passes[];
+
+u64 bch2_recovery_passes_to_stable(u64 v);
+u64 bch2_recovery_passes_from_stable(u64 v);
+
+u64 bch2_fsck_recovery_passes(void);
+
+int bch2_run_explicit_recovery_pass(struct bch_fs *, enum bch_recovery_pass);
+int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *, enum bch_recovery_pass);
+
+int bch2_run_online_recovery_passes(struct bch_fs *);
+int bch2_run_recovery_passes(struct bch_fs *);
+
+#endif /* _BCACHEFS_RECOVERY_PASSES_H */
similarity index 91%
rename from fs/bcachefs/recovery_types.h
rename to fs/bcachefs/recovery_passes_types.h
index 4959e95e7c74654e8b3a6e78a0ea7778713bd8ba..773aea9a0080fd6e6105ed4f9e8a91fde1d125ee 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _BCACHEFS_RECOVERY_TYPES_H
-#define _BCACHEFS_RECOVERY_TYPES_H
+#ifndef _BCACHEFS_RECOVERY_PASSES_TYPES_H
+#define _BCACHEFS_RECOVERY_PASSES_TYPES_H
 
 #define PASS_SILENT            BIT(0)
 #define PASS_FSCK              BIT(1)
@@ -13,6 +13,7 @@
  * must never change:
  */
 #define BCH_RECOVERY_PASSES()                                                  \
+       x(scan_for_btree_nodes,                 37, 0)                          \
        x(check_topology,                        4, 0)                          \
        x(alloc_read,                            0, PASS_ALWAYS)                \
        x(stripes_read,                          1, PASS_ALWAYS)                \
        x(check_alloc_to_lru_refs,              15, PASS_ONLINE|PASS_FSCK)      \
        x(fs_freespace_init,                    16, PASS_ALWAYS|PASS_SILENT)    \
        x(bucket_gens_init,                     17, 0)                          \
+       x(reconstruct_snapshots,                38, 0)                          \
        x(check_snapshot_trees,                 18, PASS_ONLINE|PASS_FSCK)      \
        x(check_snapshots,                      19, PASS_ONLINE|PASS_FSCK)      \
        x(check_subvols,                        20, PASS_ONLINE|PASS_FSCK)      \
        x(check_subvol_children,                35, PASS_ONLINE|PASS_FSCK)      \
        x(delete_dead_snapshots,                21, PASS_ONLINE|PASS_FSCK)      \
        x(fs_upgrade_for_subvolumes,            22, 0)                          \
-       x(resume_logged_ops,                    23, PASS_ALWAYS)                \
        x(check_inodes,                         24, PASS_FSCK)                  \
        x(check_extents,                        25, PASS_FSCK)                  \
        x(check_indirect_extents,               26, PASS_FSCK)                  \
@@ -47,6 +48,7 @@
        x(check_subvolume_structure,            36, PASS_ONLINE|PASS_FSCK)      \
        x(check_directory_structure,            30, PASS_ONLINE|PASS_FSCK)      \
        x(check_nlinks,                         31, PASS_FSCK)                  \
+       x(resume_logged_ops,                    23, PASS_ALWAYS)                \
        x(delete_dead_inodes,                   32, PASS_FSCK|PASS_UNCLEAN)     \
        x(fix_reflink_p,                        33, 0)                          \
        x(set_fs_needs_rebalance,               34, 0)                          \
@@ -56,6 +58,7 @@ enum bch_recovery_pass {
 #define x(n, id, when) BCH_RECOVERY_PASS_##n,
        BCH_RECOVERY_PASSES()
 #undef x
+       BCH_RECOVERY_PASS_NR
 };
 
 /* But we also need stable identifiers that can be used in the superblock */
@@ -65,4 +68,4 @@ enum bch_recovery_pass_stable {
 #undef x
 };
 
-#endif /* _BCACHEFS_RECOVERY_TYPES_H */
+#endif /* _BCACHEFS_RECOVERY_PASSES_TYPES_H */
index c47c66c2b394dc8df391fa3adf8bfea03e1e447e..ff7864731a073d7e02356331da5767eaceb8b825 100644 (file)
@@ -185,8 +185,7 @@ not_found:
                } else {
                        bkey_error_init(update);
                        update->k.p             = p.k->p;
-                       update->k.p.offset      = next_idx;
-                       update->k.size          = next_idx - *idx;
+                       update->k.size          = p.k->size;
                        set_bkey_val_u64s(&update->k, 0);
                }
 
index cc2672c120312c39f82e9a1a9afe0ed959b15dba..678b9c20e2514b12fec66052510775142119620f 100644 (file)
@@ -6,12 +6,15 @@
 #include "replicas.h"
 #include "super-io.h"
 
+#include <linux/sort.h>
+
 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
                                            struct bch_replicas_cpu *);
 
 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
-static int bch2_memcmp(const void *l, const void *r, size_t size)
+static int bch2_memcmp(const void *l, const void *r,  const void *priv)
 {
+       size_t size = (size_t) priv;
        return memcmp(l, r, size);
 }
 
@@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
 
 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
 {
-       eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
+       eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
+                         bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
 }
 
 static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
@@ -228,7 +232,7 @@ static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
 
        verify_replicas_entry(search);
 
-#define entry_cmp(_l, _r, size)        memcmp(_l, _r, entry_size)
+#define entry_cmp(_l, _r)      memcmp(_l, _r, entry_size)
        idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
                              entry_cmp, search);
 #undef entry_cmp
@@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
 {
        unsigned i;
 
-       sort_cmp_size(cpu_r->entries,
-                     cpu_r->nr,
-                     cpu_r->entry_size,
-                     bch2_memcmp, NULL);
+       sort_r(cpu_r->entries,
+              cpu_r->nr,
+              cpu_r->entry_size,
+              bch2_memcmp, NULL,
+              (void *)(size_t)cpu_r->entry_size);
 
        for (i = 0; i < cpu_r->nr; i++) {
                struct bch_replicas_entry_v1 *e =
index e4396cb0bacb037bac965e1beccd261d4a960789..d6f81179c3a29b6e884f92c94628d512626c6b45 100644 (file)
@@ -7,7 +7,7 @@
 
 #include "bcachefs.h"
 #include "darray.h"
-#include "recovery.h"
+#include "recovery_passes.h"
 #include "sb-downgrade.h"
 #include "sb-errors.h"
 #include "super-io.h"
index 5178bf579f7c538b6f1132fc687aa281177c7db9..d7d609131030a817c5fa2867fc3cee5796fb898c 100644 (file)
        x(subvol_children_bad,                                  257)    \
        x(subvol_loop,                                          258)    \
        x(subvol_unreachable,                                   259)    \
-       x(btree_node_bkey_bad_u64s,                             260)
+       x(btree_node_bkey_bad_u64s,                             260)    \
+       x(btree_node_topology_empty_interior_node,              261)    \
+       x(btree_ptr_v2_min_key_bad,                             262)    \
+       x(btree_root_unreadable_and_scan_found_nothing,         263)    \
+       x(snapshot_node_missing,                                264)    \
+       x(dup_backpointer_to_bad_csum_extent,                   265)
 
 enum bch_sb_error_id {
 #define x(t, n) BCH_FSCK_ERR_##t = n,
index 39debe814bf392acb76c7cebe6752736d6c57cff..0e806f04f3d7c5117ade3d612b1c851da243aead 100644 (file)
@@ -8,6 +8,7 @@
 #include "errcode.h"
 #include "error.h"
 #include "fs.h"
+#include "recovery_passes.h"
 #include "snapshot.h"
 
 #include <linux/random.h>
@@ -93,8 +94,10 @@ static int bch2_snapshot_tree_create(struct btree_trans *trans,
 
 static bool __bch2_snapshot_is_ancestor_early(struct snapshot_table *t, u32 id, u32 ancestor)
 {
-       while (id && id < ancestor)
-               id = __snapshot_t(t, id)->parent;
+       while (id && id < ancestor) {
+               const struct snapshot_t *s = __snapshot_t(t, id);
+               id = s ? s->parent : 0;
+       }
        return id == ancestor;
 }
 
@@ -110,6 +113,8 @@ static bool bch2_snapshot_is_ancestor_early(struct bch_fs *c, u32 id, u32 ancest
 static inline u32 get_ancestor_below(struct snapshot_table *t, u32 id, u32 ancestor)
 {
        const struct snapshot_t *s = __snapshot_t(t, id);
+       if (!s)
+               return 0;
 
        if (s->skip[2] <= ancestor)
                return s->skip[2];
@@ -127,7 +132,7 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
        rcu_read_lock();
        struct snapshot_table *t = rcu_dereference(c->snapshots);
 
-       if (unlikely(c->recovery_pass_done <= BCH_RECOVERY_PASS_check_snapshots)) {
+       if (unlikely(c->recovery_pass_done < BCH_RECOVERY_PASS_check_snapshots)) {
                ret = __bch2_snapshot_is_ancestor_early(t, id, ancestor);
                goto out;
        }
@@ -151,36 +156,39 @@ out:
 static noinline struct snapshot_t *__snapshot_t_mut(struct bch_fs *c, u32 id)
 {
        size_t idx = U32_MAX - id;
-       size_t new_size;
        struct snapshot_table *new, *old;
 
-       new_size = max(16UL, roundup_pow_of_two(idx + 1));
+       size_t new_bytes = kmalloc_size_roundup(struct_size(new, s, idx + 1));
+       size_t new_size = (new_bytes - sizeof(*new)) / sizeof(new->s[0]);
 
-       new = kvzalloc(struct_size(new, s, new_size), GFP_KERNEL);
+       new = kvzalloc(new_bytes, GFP_KERNEL);
        if (!new)
                return NULL;
 
+       new->nr = new_size;
+
        old = rcu_dereference_protected(c->snapshots, true);
        if (old)
-               memcpy(new->s,
-                      rcu_dereference_protected(c->snapshots, true)->s,
-                      sizeof(new->s[0]) * c->snapshot_table_size);
+               memcpy(new->s, old->s, sizeof(old->s[0]) * old->nr);
 
        rcu_assign_pointer(c->snapshots, new);
-       c->snapshot_table_size = new_size;
-       kvfree_rcu_mightsleep(old);
+       kvfree_rcu(old, rcu);
 
-       return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+       return &rcu_dereference_protected(c->snapshots,
+                               lockdep_is_held(&c->snapshot_table_lock))->s[idx];
 }
 
 static inline struct snapshot_t *snapshot_t_mut(struct bch_fs *c, u32 id)
 {
        size_t idx = U32_MAX - id;
+       struct snapshot_table *table =
+               rcu_dereference_protected(c->snapshots,
+                               lockdep_is_held(&c->snapshot_table_lock));
 
        lockdep_assert_held(&c->snapshot_table_lock);
 
-       if (likely(idx < c->snapshot_table_size))
-               return &rcu_dereference_protected(c->snapshots, true)->s[idx];
+       if (likely(table && idx < table->nr))
+               return &table->s[idx];
 
        return __snapshot_t_mut(c, id);
 }
@@ -567,6 +575,13 @@ static int check_snapshot_tree(struct btree_trans *trans,
                u32 subvol_id;
 
                ret = bch2_snapshot_tree_master_subvol(trans, root_id, &subvol_id);
+               bch_err_fn(c, ret);
+
+               if (bch2_err_matches(ret, ENOENT)) { /* nothing to be done here */
+                       ret = 0;
+                       goto err;
+               }
+
                if (ret)
                        goto err;
 
@@ -724,7 +739,6 @@ static int check_snapshot(struct btree_trans *trans,
        u32 parent_id = bch2_snapshot_parent_early(c, k.k->p.offset);
        u32 real_depth;
        struct printbuf buf = PRINTBUF;
-       bool should_have_subvol;
        u32 i, id;
        int ret = 0;
 
@@ -770,7 +784,7 @@ static int check_snapshot(struct btree_trans *trans,
                }
        }
 
-       should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
+       bool should_have_subvol = BCH_SNAPSHOT_SUBVOL(&s) &&
                !BCH_SNAPSHOT_DELETED(&s);
 
        if (should_have_subvol) {
@@ -872,6 +886,154 @@ int bch2_check_snapshots(struct bch_fs *c)
        return ret;
 }
 
+static int check_snapshot_exists(struct btree_trans *trans, u32 id)
+{
+       struct bch_fs *c = trans->c;
+
+       if (bch2_snapshot_equiv(c, id))
+               return 0;
+
+       u32 tree_id;
+       int ret = bch2_snapshot_tree_create(trans, id, 0, &tree_id);
+       if (ret)
+               return ret;
+
+       struct bkey_i_snapshot *snapshot = bch2_trans_kmalloc(trans, sizeof(*snapshot));
+       ret = PTR_ERR_OR_ZERO(snapshot);
+       if (ret)
+               return ret;
+
+       bkey_snapshot_init(&snapshot->k_i);
+       snapshot->k.p           = POS(0, id);
+       snapshot->v.tree        = cpu_to_le32(tree_id);
+       snapshot->v.btime.lo    = cpu_to_le64(bch2_current_time(c));
+
+       return  bch2_btree_insert_trans(trans, BTREE_ID_snapshots, &snapshot->k_i, 0) ?:
+               bch2_mark_snapshot(trans, BTREE_ID_snapshots, 0,
+                                  bkey_s_c_null, bkey_i_to_s(&snapshot->k_i), 0) ?:
+               bch2_snapshot_set_equiv(trans, bkey_i_to_s_c(&snapshot->k_i));
+}
+
+/* Figure out which snapshot nodes belong in the same tree: */
+struct snapshot_tree_reconstruct {
+       enum btree_id                   btree;
+       struct bpos                     cur_pos;
+       snapshot_id_list                cur_ids;
+       DARRAY(snapshot_id_list)        trees;
+};
+
+static void snapshot_tree_reconstruct_exit(struct snapshot_tree_reconstruct *r)
+{
+       darray_for_each(r->trees, i)
+               darray_exit(i);
+       darray_exit(&r->trees);
+       darray_exit(&r->cur_ids);
+}
+
+static inline bool same_snapshot(struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+       return r->btree == BTREE_ID_inodes
+               ? r->cur_pos.offset == pos.offset
+               : r->cur_pos.inode == pos.inode;
+}
+
+static inline bool snapshot_id_lists_have_common(snapshot_id_list *l, snapshot_id_list *r)
+{
+       darray_for_each(*l, i)
+               if (snapshot_list_has_id(r, *i))
+                       return true;
+       return false;
+}
+
+static void snapshot_id_list_to_text(struct printbuf *out, snapshot_id_list *s)
+{
+       bool first = true;
+       darray_for_each(*s, i) {
+               if (!first)
+                       prt_char(out, ' ');
+               first = false;
+               prt_printf(out, "%u", *i);
+       }
+}
+
+static int snapshot_tree_reconstruct_next(struct bch_fs *c, struct snapshot_tree_reconstruct *r)
+{
+       if (r->cur_ids.nr) {
+               darray_for_each(r->trees, i)
+                       if (snapshot_id_lists_have_common(i, &r->cur_ids)) {
+                               int ret = snapshot_list_merge(c, i, &r->cur_ids);
+                               if (ret)
+                                       return ret;
+                               goto out;
+                       }
+               darray_push(&r->trees, r->cur_ids);
+               darray_init(&r->cur_ids);
+       }
+out:
+       r->cur_ids.nr = 0;
+       return 0;
+}
+
+static int get_snapshot_trees(struct bch_fs *c, struct snapshot_tree_reconstruct *r, struct bpos pos)
+{
+       if (!same_snapshot(r, pos))
+               snapshot_tree_reconstruct_next(c, r);
+       r->cur_pos = pos;
+       return snapshot_list_add_nodup(c, &r->cur_ids, pos.snapshot);
+}
+
+int bch2_reconstruct_snapshots(struct bch_fs *c)
+{
+       struct btree_trans *trans = bch2_trans_get(c);
+       struct printbuf buf = PRINTBUF;
+       struct snapshot_tree_reconstruct r = {};
+       int ret = 0;
+
+       for (unsigned btree = 0; btree < BTREE_ID_NR; btree++) {
+               if (btree_type_has_snapshots(btree)) {
+                       r.btree = btree;
+
+                       ret = for_each_btree_key(trans, iter, btree, POS_MIN,
+                                       BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_PREFETCH, k, ({
+                               get_snapshot_trees(c, &r, k.k->p);
+                       }));
+                       if (ret)
+                               goto err;
+
+                       snapshot_tree_reconstruct_next(c, &r);
+               }
+       }
+
+       darray_for_each(r.trees, t) {
+               printbuf_reset(&buf);
+               snapshot_id_list_to_text(&buf, t);
+
+               darray_for_each(*t, id) {
+                       if (fsck_err_on(!bch2_snapshot_equiv(c, *id),
+                                       c, snapshot_node_missing,
+                                       "snapshot node %u from tree %s missing", *id, buf.buf)) {
+                               if (t->nr > 1) {
+                                       bch_err(c, "cannot reconstruct snapshot trees with multiple nodes");
+                                       ret = -BCH_ERR_fsck_repair_unimplemented;
+                                       goto err;
+                               }
+
+                               ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
+                                               check_snapshot_exists(trans, *id));
+                               if (ret)
+                                       goto err;
+                       }
+               }
+       }
+fsck_err:
+err:
+       bch2_trans_put(trans);
+       snapshot_tree_reconstruct_exit(&r);
+       printbuf_exit(&buf);
+       bch_err_fn(c, ret);
+       return ret;
+}
+
 /*
  * Mark a snapshot as deleted, for future cleanup:
  */
@@ -1682,6 +1844,20 @@ int bch2_snapshots_read(struct bch_fs *c)
                                   POS_MIN, 0, k,
                           (set_is_ancestor_bitmap(c, k.k->p.offset), 0)));
        bch_err_fn(c, ret);
+
+       /*
+        * It's important that we check if we need to reconstruct snapshots
+        * before going RW, so we mark that pass as required in the superblock -
+        * otherwise, we could end up deleting keys with missing snapshot nodes
+        * instead
+        */
+       BUG_ON(!test_bit(BCH_FS_new_fs, &c->flags) &&
+              test_bit(BCH_FS_may_go_rw, &c->flags));
+
+       if (bch2_err_matches(ret, EIO) ||
+           (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots)))
+               ret = bch2_run_explicit_recovery_pass_persistent(c, BCH_RECOVERY_PASS_reconstruct_snapshots);
+
        return ret;
 }
 
index 7c66ffc06385ddea63685298f691660d906055d5..b7d2fed37c4f31167fe036bb9967ac084c733edf 100644 (file)
@@ -33,7 +33,11 @@ int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned,
 
 static inline struct snapshot_t *__snapshot_t(struct snapshot_table *t, u32 id)
 {
-       return &t->s[U32_MAX - id];
+       u32 idx = U32_MAX - id;
+
+       return likely(t && idx < t->nr)
+               ? &t->s[idx]
+               : NULL;
 }
 
 static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
@@ -44,7 +48,8 @@ static inline const struct snapshot_t *snapshot_t(struct bch_fs *c, u32 id)
 static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
 {
        rcu_read_lock();
-       id = snapshot_t(c, id)->tree;
+       const struct snapshot_t *s = snapshot_t(c, id);
+       id = s ? s->tree : 0;
        rcu_read_unlock();
 
        return id;
@@ -52,7 +57,8 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
 
 static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
 {
-       return snapshot_t(c, id)->parent;
+       const struct snapshot_t *s = snapshot_t(c, id);
+       return s ? s->parent : 0;
 }
 
 static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
@@ -66,19 +72,19 @@ static inline u32 bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
 
 static inline u32 __bch2_snapshot_parent(struct bch_fs *c, u32 id)
 {
-#ifdef CONFIG_BCACHEFS_DEBUG
-       u32 parent = snapshot_t(c, id)->parent;
+       const struct snapshot_t *s = snapshot_t(c, id);
+       if (!s)
+               return 0;
 
-       if (parent &&
-           snapshot_t(c, id)->depth != snapshot_t(c, parent)->depth + 1)
+       u32 parent = s->parent;
+       if (IS_ENABLED(CONFIG_BCACHEFS_DEBU) &&
+           parent &&
+           s->depth != snapshot_t(c, parent)->depth + 1)
                panic("id %u depth=%u parent %u depth=%u\n",
                      id, snapshot_t(c, id)->depth,
                      parent, snapshot_t(c, parent)->depth);
 
        return parent;
-#else
-       return snapshot_t(c, id)->parent;
-#endif
 }
 
 static inline u32 bch2_snapshot_parent(struct bch_fs *c, u32 id)
@@ -116,7 +122,8 @@ static inline u32 bch2_snapshot_root(struct bch_fs *c, u32 id)
 
 static inline u32 __bch2_snapshot_equiv(struct bch_fs *c, u32 id)
 {
-       return snapshot_t(c, id)->equiv;
+       const struct snapshot_t *s = snapshot_t(c, id);
+       return s ? s->equiv : 0;
 }
 
 static inline u32 bch2_snapshot_equiv(struct bch_fs *c, u32 id)
@@ -133,38 +140,22 @@ static inline bool bch2_snapshot_is_equiv(struct bch_fs *c, u32 id)
        return id == bch2_snapshot_equiv(c, id);
 }
 
-static inline bool bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_internal_node(struct bch_fs *c, u32 id)
 {
-       const struct snapshot_t *s;
-       bool ret;
-
        rcu_read_lock();
-       s = snapshot_t(c, id);
-       ret = s->children[0];
+       const struct snapshot_t *s = snapshot_t(c, id);
+       int ret = s ? s->children[0] : -BCH_ERR_invalid_snapshot_node;
        rcu_read_unlock();
 
        return ret;
 }
 
-static inline u32 bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
-{
-       return !bch2_snapshot_is_internal_node(c, id);
-}
-
-static inline u32 bch2_snapshot_sibling(struct bch_fs *c, u32 id)
+static inline int bch2_snapshot_is_leaf(struct bch_fs *c, u32 id)
 {
-       const struct snapshot_t *s;
-       u32 parent = __bch2_snapshot_parent(c, id);
-
-       if (!parent)
-               return 0;
-
-       s = snapshot_t(c, __bch2_snapshot_parent(c, id));
-       if (id == s->children[0])
-               return s->children[1];
-       if (id == s->children[1])
-               return s->children[0];
-       return 0;
+       int ret = bch2_snapshot_is_internal_node(c, id);
+       if (ret < 0)
+               return ret;
+       return !ret;
 }
 
 static inline u32 bch2_snapshot_depth(struct bch_fs *c, u32 parent)
@@ -218,15 +209,34 @@ static inline bool snapshot_list_has_ancestor(struct bch_fs *c, snapshot_id_list
 
 static inline int snapshot_list_add(struct bch_fs *c, snapshot_id_list *s, u32 id)
 {
-       int ret;
-
        BUG_ON(snapshot_list_has_id(s, id));
-       ret = darray_push(s, id);
+       int ret = darray_push(s, id);
        if (ret)
                bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
        return ret;
 }
 
+static inline int snapshot_list_add_nodup(struct bch_fs *c, snapshot_id_list *s, u32 id)
+{
+       int ret = snapshot_list_has_id(s, id)
+               ? 0
+               : darray_push(s, id);
+       if (ret)
+               bch_err(c, "error reallocating snapshot_id_list (size %zu)", s->size);
+       return ret;
+}
+
+static inline int snapshot_list_merge(struct bch_fs *c, snapshot_id_list *dst, snapshot_id_list *src)
+{
+       darray_for_each(*src, i) {
+               int ret = snapshot_list_add_nodup(c, dst, *i);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
 int bch2_snapshot_lookup(struct btree_trans *trans, u32 id,
                         struct bch_snapshot *s);
 int bch2_snapshot_get_subvol(struct btree_trans *, u32,
@@ -238,6 +248,7 @@ int bch2_snapshot_node_create(struct btree_trans *, u32,
 
 int bch2_check_snapshot_trees(struct bch_fs *);
 int bch2_check_snapshots(struct bch_fs *);
+int bch2_reconstruct_snapshots(struct bch_fs *);
 
 int bch2_snapshot_node_set_deleted(struct btree_trans *, u32);
 void bch2_delete_dead_snapshots_work(struct work_struct *);
@@ -249,7 +260,7 @@ static inline int bch2_key_has_snapshot_overwrites(struct btree_trans *trans,
                                          struct bpos pos)
 {
        if (!btree_type_has_snapshots(id) ||
-           bch2_snapshot_is_leaf(trans->c, pos.snapshot))
+           bch2_snapshot_is_leaf(trans->c, pos.snapshot) > 0)
                return 0;
 
        return __bch2_key_has_snapshot_overwrites(trans, id, pos);
index ce7aed12194238071f8fbf37aa111160ced286c9..88a79c82327687001dddbc9111ab93e920b7c3c5 100644 (file)
@@ -595,6 +595,78 @@ err:
        return ret;
 }
 
+int bch2_initialize_subvolumes(struct bch_fs *c)
+{
+       struct bkey_i_snapshot_tree     root_tree;
+       struct bkey_i_snapshot          root_snapshot;
+       struct bkey_i_subvolume         root_volume;
+       int ret;
+
+       bkey_snapshot_tree_init(&root_tree.k_i);
+       root_tree.k.p.offset            = 1;
+       root_tree.v.master_subvol       = cpu_to_le32(1);
+       root_tree.v.root_snapshot       = cpu_to_le32(U32_MAX);
+
+       bkey_snapshot_init(&root_snapshot.k_i);
+       root_snapshot.k.p.offset = U32_MAX;
+       root_snapshot.v.flags   = 0;
+       root_snapshot.v.parent  = 0;
+       root_snapshot.v.subvol  = cpu_to_le32(BCACHEFS_ROOT_SUBVOL);
+       root_snapshot.v.tree    = cpu_to_le32(1);
+       SET_BCH_SNAPSHOT_SUBVOL(&root_snapshot.v, true);
+
+       bkey_subvolume_init(&root_volume.k_i);
+       root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL;
+       root_volume.v.flags     = 0;
+       root_volume.v.snapshot  = cpu_to_le32(U32_MAX);
+       root_volume.v.inode     = cpu_to_le64(BCACHEFS_ROOT_INO);
+
+       ret =   bch2_btree_insert(c, BTREE_ID_snapshot_trees,   &root_tree.k_i, NULL, 0) ?:
+               bch2_btree_insert(c, BTREE_ID_snapshots,        &root_snapshot.k_i, NULL, 0) ?:
+               bch2_btree_insert(c, BTREE_ID_subvolumes,       &root_volume.k_i, NULL, 0);
+       bch_err_fn(c, ret);
+       return ret;
+}
+
+static int __bch2_fs_upgrade_for_subvolumes(struct btree_trans *trans)
+{
+       struct btree_iter iter;
+       struct bkey_s_c k;
+       struct bch_inode_unpacked inode;
+       int ret;
+
+       k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+                              SPOS(0, BCACHEFS_ROOT_INO, U32_MAX), 0);
+       ret = bkey_err(k);
+       if (ret)
+               return ret;
+
+       if (!bkey_is_inode(k.k)) {
+               bch_err(trans->c, "root inode not found");
+               ret = -BCH_ERR_ENOENT_inode;
+               goto err;
+       }
+
+       ret = bch2_inode_unpack(k, &inode);
+       BUG_ON(ret);
+
+       inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
+
+       ret = bch2_inode_write(trans, &iter, &inode);
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+/* set bi_subvol on root inode */
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *c)
+{
+       int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_lazy_rw,
+                               __bch2_fs_upgrade_for_subvolumes(trans));
+       bch_err_fn(c, ret);
+       return ret;
+}
+
 int bch2_fs_subvolumes_init(struct bch_fs *c)
 {
        INIT_WORK(&c->snapshot_delete_work, bch2_delete_dead_snapshots_work);
index 903c05162c0688ae902321aace955ca27fa5e2f9..d2015d549bd2a33102726c2f4a22cbcd2e64395d 100644 (file)
@@ -37,6 +37,9 @@ void bch2_delete_dead_snapshots_async(struct bch_fs *);
 int bch2_subvolume_unlink(struct btree_trans *, u32);
 int bch2_subvolume_create(struct btree_trans *, u64, u32, u32, u32 *, u32 *, bool);
 
+int bch2_initialize_subvolumes(struct bch_fs *);
+int bch2_fs_upgrade_for_subvolumes(struct bch_fs *);
+
 int bch2_fs_subvolumes_init(struct bch_fs *);
 
 #endif /* _BCACHEFS_SUBVOLUME_H */
index ae644adfc391680d85b6fe53c25f08ae9337e037..9b10c8947828e0d40db0a63f7d3db22457769d46 100644 (file)
@@ -20,6 +20,8 @@ struct snapshot_t {
 };
 
 struct snapshot_table {
+       struct rcu_head         rcu;
+       size_t                  nr;
 #ifndef RUST_BINDGEN
        DECLARE_FLEX_ARRAY(struct snapshot_t, s);
 #else
index ad28e370b6404c915ee8bf8743ed535366fc6a55..5eee055ee2721a3967fb31ca38adcbc5672521d6 100644 (file)
@@ -8,7 +8,7 @@
 #include "journal.h"
 #include "journal_sb.h"
 #include "journal_seq_blacklist.h"
-#include "recovery.h"
+#include "recovery_passes.h"
 #include "replicas.h"
 #include "quota.h"
 #include "sb-clean.h"
@@ -143,7 +143,7 @@ void bch2_free_super(struct bch_sb_handle *sb)
 {
        kfree(sb->bio);
        if (!IS_ERR_OR_NULL(sb->s_bdev_file))
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        kfree(sb->holder);
        kfree(sb->sb_name);
 
@@ -527,9 +527,11 @@ static void bch2_sb_update(struct bch_fs *c)
        memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent));
 
        struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext);
-       if (ext)
+       if (ext) {
                le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent,
                                    sizeof(c->sb.errors_silent) * 8);
+               c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data);
+       }
 
        for_each_member_device(c, ca) {
                struct bch_member m = bch2_sb_member_get(src, ca->dev_idx);
@@ -1162,6 +1164,11 @@ static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb,
 
                kfree(errors_silent);
        }
+
+       prt_printf(out, "Btrees with missing data:");
+       prt_tab(out);
+       prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data));
+       prt_newline(out);
 }
 
 static const struct bch_sb_field_ops bch_sb_field_ops_ext = {
index 1ad6e5cd9476c86f4b905feff6f727b4cdd94a4e..ed63018f21bef58b2aa854f9c3f05ad1b3f26202 100644 (file)
@@ -15,6 +15,7 @@
 #include "btree_gc.h"
 #include "btree_journal_iter.h"
 #include "btree_key_cache.h"
+#include "btree_node_scan.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
 #include "btree_write_buffer.h"
@@ -365,7 +366,7 @@ void bch2_fs_read_only(struct bch_fs *c)
            !test_bit(BCH_FS_emergency_ro, &c->flags) &&
            test_bit(BCH_FS_started, &c->flags) &&
            test_bit(BCH_FS_clean_shutdown, &c->flags) &&
-           !c->opts.norecovery) {
+           c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay) {
                BUG_ON(c->journal.last_empty_seq != journal_cur_seq(&c->journal));
                BUG_ON(atomic_read(&c->btree_cache.dirty));
                BUG_ON(atomic_long_read(&c->btree_key_cache.nr_dirty));
@@ -510,7 +511,8 @@ err:
 
 int bch2_fs_read_write(struct bch_fs *c)
 {
-       if (c->opts.norecovery)
+       if (c->opts.recovery_pass_last &&
+           c->opts.recovery_pass_last < BCH_RECOVERY_PASS_journal_replay)
                return -BCH_ERR_erofs_norecovery;
 
        if (c->opts.nochanges)
@@ -535,6 +537,7 @@ static void __bch2_fs_free(struct bch_fs *c)
        for (i = 0; i < BCH_TIME_STAT_NR; i++)
                bch2_time_stats_exit(&c->times[i]);
 
+       bch2_find_btree_nodes_exit(&c->found_btree_nodes);
        bch2_free_pending_node_rewrites(c);
        bch2_fs_sb_errors_exit(c);
        bch2_fs_counters_exit(c);
@@ -559,6 +562,7 @@ static void __bch2_fs_free(struct bch_fs *c)
        bch2_io_clock_exit(&c->io_clock[READ]);
        bch2_fs_compress_exit(c);
        bch2_journal_keys_put_initial(c);
+       bch2_find_btree_nodes_exit(&c->found_btree_nodes);
        BUG_ON(atomic_read(&c->journal_keys.ref));
        bch2_fs_btree_write_buffer_exit(c);
        percpu_free_rwsem(&c->mark_lock);
@@ -1015,8 +1019,16 @@ int bch2_fs_start(struct bch_fs *c)
        for_each_online_member(c, ca)
                bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx)->last_mount = cpu_to_le64(now);
 
+       struct bch_sb_field_ext *ext =
+               bch2_sb_field_get_minsize(&c->disk_sb, ext, sizeof(*ext) / sizeof(u64));
        mutex_unlock(&c->sb_lock);
 
+       if (!ext) {
+               bch_err(c, "insufficient space in superblock for sb_field_ext");
+               ret = -BCH_ERR_ENOSPC_sb;
+               goto err;
+       }
+
        for_each_rw_member(c, ca)
                bch2_dev_allocator_add(c, ca);
        bch2_recalc_capacity(c);
index 216fadf16928b9a73eb47da96a8a7b409657e8fe..92c6ad75e702ab5680b45b7964e522c9b9012525 100644 (file)
@@ -707,149 +707,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
        }
 }
 
-static int alignment_ok(const void *base, size_t align)
-{
-       return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
-               ((unsigned long)base & (align - 1)) == 0;
-}
-
-static void u32_swap(void *a, void *b, size_t size)
-{
-       u32 t = *(u32 *)a;
-       *(u32 *)a = *(u32 *)b;
-       *(u32 *)b = t;
-}
-
-static void u64_swap(void *a, void *b, size_t size)
-{
-       u64 t = *(u64 *)a;
-       *(u64 *)a = *(u64 *)b;
-       *(u64 *)b = t;
-}
-
-static void generic_swap(void *a, void *b, size_t size)
-{
-       char t;
-
-       do {
-               t = *(char *)a;
-               *(char *)a++ = *(char *)b;
-               *(char *)b++ = t;
-       } while (--size > 0);
-}
-
-static inline int do_cmp(void *base, size_t n, size_t size,
-                        int (*cmp_func)(const void *, const void *, size_t),
-                        size_t l, size_t r)
-{
-       return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
-                       base + inorder_to_eytzinger0(r, n) * size,
-                       size);
-}
-
-static inline void do_swap(void *base, size_t n, size_t size,
-                          void (*swap_func)(void *, void *, size_t),
-                          size_t l, size_t r)
-{
-       swap_func(base + inorder_to_eytzinger0(l, n) * size,
-                 base + inorder_to_eytzinger0(r, n) * size,
-                 size);
-}
-
-void eytzinger0_sort(void *base, size_t n, size_t size,
-                    int (*cmp_func)(const void *, const void *, size_t),
-                    void (*swap_func)(void *, void *, size_t))
-{
-       int i, c, r;
-
-       if (!swap_func) {
-               if (size == 4 && alignment_ok(base, 4))
-                       swap_func = u32_swap;
-               else if (size == 8 && alignment_ok(base, 8))
-                       swap_func = u64_swap;
-               else
-                       swap_func = generic_swap;
-       }
-
-       /* heapify */
-       for (i = n / 2 - 1; i >= 0; --i) {
-               for (r = i; r * 2 + 1 < n; r = c) {
-                       c = r * 2 + 1;
-
-                       if (c + 1 < n &&
-                           do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-                               c++;
-
-                       if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-                               break;
-
-                       do_swap(base, n, size, swap_func, r, c);
-               }
-       }
-
-       /* sort */
-       for (i = n - 1; i > 0; --i) {
-               do_swap(base, n, size, swap_func, 0, i);
-
-               for (r = 0; r * 2 + 1 < i; r = c) {
-                       c = r * 2 + 1;
-
-                       if (c + 1 < i &&
-                           do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
-                               c++;
-
-                       if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
-                               break;
-
-                       do_swap(base, n, size, swap_func, r, c);
-               }
-       }
-}
-
-void sort_cmp_size(void *base, size_t num, size_t size,
-         int (*cmp_func)(const void *, const void *, size_t),
-         void (*swap_func)(void *, void *, size_t size))
-{
-       /* pre-scale counters for performance */
-       int i = (num/2 - 1) * size, n = num * size, c, r;
-
-       if (!swap_func) {
-               if (size == 4 && alignment_ok(base, 4))
-                       swap_func = u32_swap;
-               else if (size == 8 && alignment_ok(base, 8))
-                       swap_func = u64_swap;
-               else
-                       swap_func = generic_swap;
-       }
-
-       /* heapify */
-       for ( ; i >= 0; i -= size) {
-               for (r = i; r * 2 + size < n; r  = c) {
-                       c = r * 2 + size;
-                       if (c < n - size &&
-                           cmp_func(base + c, base + c + size, size) < 0)
-                               c += size;
-                       if (cmp_func(base + r, base + c, size) >= 0)
-                               break;
-                       swap_func(base + r, base + c, size);
-               }
-       }
-
-       /* sort */
-       for (i = n - size; i > 0; i -= size) {
-               swap_func(base, base + i, size);
-               for (r = 0; r * 2 + size < i; r = c) {
-                       c = r * 2 + size;
-                       if (c < i - size &&
-                           cmp_func(base + c, base + c + size, size) < 0)
-                               c += size;
-                       if (cmp_func(base + r, base + c, size) >= 0)
-                               break;
-                       swap_func(base + r, base + c, size);
-               }
-       }
-}
-
 #if 0
 void eytzinger1_test(void)
 {
index 175aee3074c7d539d40e7ec3ffc072a0e3d2d388..b7e7c29278fc052a90fe7c029e8fd0626c48ddc5 100644 (file)
@@ -631,10 +631,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
        memset(s + bytes, c, rem);
 }
 
-void sort_cmp_size(void *base, size_t num, size_t size,
-         int (*cmp_func)(const void *, const void *, size_t),
-         void (*swap_func)(void *, void *, size_t));
-
 /* just the memmove, doesn't update @_nr */
 #define __array_insert_item(_array, _nr, _pos)                         \
        memmove(&(_array)[(_pos) + 1],                                  \
@@ -797,4 +793,14 @@ static inline void __set_bit_le64(size_t bit, __le64 *addr)
        addr[bit / 64] |= cpu_to_le64(BIT_ULL(bit % 64));
 }
 
+static inline void __clear_bit_le64(size_t bit, __le64 *addr)
+{
+       addr[bit / 64] &= !cpu_to_le64(BIT_ULL(bit % 64));
+}
+
+static inline bool test_bit_le64(size_t bit, __le64 *addr)
+{
+       return (addr[bit / 64] & cpu_to_le64(BIT_ULL(bit % 64))) != 0;
+}
+
 #endif /* _BCACHEFS_UTIL_H */
index 39e75131fd5aa01d732f703cb1f421a3696bffd6..9901057a15ba79a110c8a90423bc7707102590d8 100644 (file)
@@ -495,7 +495,7 @@ static void cramfs_kill_sb(struct super_block *sb)
                sb->s_mtd = NULL;
        } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
                sync_blockdev(sb->s_bdev);
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        }
        kfree(sbi);
 }
index cfb8449c731f9ac53fb3add808e13493175508c4..044135796f2b6ebe86e56b69f57501e7567d761b 100644 (file)
@@ -5668,7 +5668,7 @@ failed_mount:
        brelse(sbi->s_sbh);
        if (sbi->s_journal_bdev_file) {
                invalidate_bdev(file_bdev(sbi->s_journal_bdev_file));
-               fput(sbi->s_journal_bdev_file);
+               bdev_fput(sbi->s_journal_bdev_file);
        }
 out_fail:
        invalidate_bdev(sb->s_bdev);
@@ -5913,7 +5913,7 @@ static struct file *ext4_get_journal_blkdev(struct super_block *sb,
 out_bh:
        brelse(bh);
 out_bdev:
-       fput(bdev_file);
+       bdev_fput(bdev_file);
        return ERR_PTR(errno);
 }
 
@@ -5952,7 +5952,7 @@ static journal_t *ext4_open_dev_journal(struct super_block *sb,
 out_journal:
        jbd2_journal_destroy(journal);
 out_bdev:
-       fput(bdev_file);
+       bdev_fput(bdev_file);
        return ERR_PTR(errno);
 }
 
@@ -7327,7 +7327,7 @@ static void ext4_kill_sb(struct super_block *sb)
        kill_block_super(sb);
 
        if (bdev_file)
-               fput(bdev_file);
+               bdev_fput(bdev_file);
 }
 
 static struct file_system_type ext4_fs_type = {
index a6867f26f141836dcd4a4f0136dd67a9de6c3c74..a4bc26dfdb1af5973783d2817bf2deed889f3c33 100644 (file)
@@ -1558,7 +1558,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
 
        for (i = 0; i < sbi->s_ndevs; i++) {
                if (i > 0)
-                       fput(FDEV(i).bdev_file);
+                       bdev_fput(FDEV(i).bdev_file);
 #ifdef CONFIG_BLK_DEV_ZONED
                kvfree(FDEV(i).blkz_seq);
 #endif
index 73389c68e25170c81d6f84483f09b43216ba4b52..9609349e92e5e1ba422369fa29a2f6345f7fe908 100644 (file)
@@ -1141,7 +1141,7 @@ journal_found:
        lbmLogShutdown(log);
 
       close:           /* close external log device */
-       fput(bdev_file);
+       bdev_fput(bdev_file);
 
       free:            /* free log descriptor */
        mutex_unlock(&jfs_log_mutex);
@@ -1485,7 +1485,7 @@ int lmLogClose(struct super_block *sb)
        bdev_file = log->bdev_file;
        rc = lmLogShutdown(log);
 
-       fput(bdev_file);
+       bdev_fput(bdev_file);
 
        kfree(log);
 
index ceb9ddf8dfdd4e3d8b711ab655bc928eeb3b3e50..c5b2a25be7d048b613a11ef77b6f2f37ec1d0142 100644 (file)
@@ -4050,6 +4050,8 @@ retry:
                case 0: case S_IFREG:
                        error = vfs_create(idmap, path.dentry->d_inode,
                                           dentry, mode, true);
+                       if (!error)
+                               security_path_post_mknod(idmap, dentry);
                        break;
                case S_IFCHR: case S_IFBLK:
                        error = vfs_mknod(idmap, path.dentry->d_inode,
@@ -4060,11 +4062,6 @@ retry:
                                          dentry, mode, 0);
                        break;
        }
-
-       if (error)
-               goto out2;
-
-       security_path_post_mknod(idmap, dentry);
 out2:
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
index 6474529c42530628fd3969573fb175283f4f51e8..e539ccd39e1ee74cd8bdfd35d29f826be6f514e1 100644 (file)
@@ -2589,7 +2589,7 @@ static void journal_list_init(struct super_block *sb)
 static void release_journal_dev(struct reiserfs_journal *journal)
 {
        if (journal->j_bdev_file) {
-               fput(journal->j_bdev_file);
+               bdev_fput(journal->j_bdev_file);
                journal->j_bdev_file = NULL;
        }
 }
index 2be227532f399788de82a03e55970d33c67dc695..2cbb924620747f68d04ac53783c3b0f21c5ea0ab 100644 (file)
@@ -594,7 +594,7 @@ static void romfs_kill_sb(struct super_block *sb)
 #ifdef CONFIG_ROMFS_ON_BLOCK
        if (sb->s_bdev) {
                sync_blockdev(sb->s_bdev);
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        }
 #endif
 }
index 8ca8a45c4c621c5dabf56664bec27e5df0db8356..686b321c5a8bb5f0a1189023a3311e85aaf84c9e 100644 (file)
@@ -167,7 +167,8 @@ struct ksmbd_share_config_response {
        __u16   force_uid;
        __u16   force_gid;
        __s8    share_name[KSMBD_REQ_MAX_SHARE_NAME];
-       __u32   reserved[112];          /* Reserved room */
+       __u32   reserved[111];          /* Reserved room */
+       __u32   payload_sz;
        __u32   veto_list_sz;
        __s8    ____payload[];
 };
index 328a412259dc1b935eb2ce3eee2977d39155157b..a2f0a2edceb8ae49852dde1b40cf7594df6084d9 100644 (file)
@@ -158,7 +158,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um,
        share->name = kstrdup(name, GFP_KERNEL);
 
        if (!test_share_config_flag(share, KSMBD_SHARE_FLAG_PIPE)) {
-               share->path = kstrdup(ksmbd_share_config_path(resp),
+               int path_len = PATH_MAX;
+
+               if (resp->payload_sz)
+                       path_len = resp->payload_sz - resp->veto_list_sz;
+
+               share->path = kstrndup(ksmbd_share_config_path(resp), path_len,
                                      GFP_KERNEL);
                if (share->path)
                        share->path_sz = strlen(share->path);
index a45f7dca482e01897720cd69d90291cbaf3ff388..606aa3c5189a28de2e49e602e2c08362c91f46b7 100644 (file)
@@ -228,6 +228,11 @@ void init_smb3_0_server(struct ksmbd_conn *conn)
            conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
 
+       if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+           (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+            conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+               conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
 }
@@ -278,11 +283,6 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING |
                        SMB2_GLOBAL_CAP_DIRECTORY_LEASING;
 
-       if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
-           (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
-            conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
-               conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
-
        if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
                conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
 
index d478fa0c57abdbc7b8478624edf5133e202c85bf..5723bbf372d7cc93c9e1b2dbdd5082c2824f85f8 100644 (file)
@@ -5857,8 +5857,9 @@ static int smb2_rename(struct ksmbd_work *work,
        if (!file_info->ReplaceIfExists)
                flags = RENAME_NOREPLACE;
 
-       smb_break_all_levII_oplock(work, fp, 0);
        rc = ksmbd_vfs_rename(work, &fp->filp->f_path, new_name, flags);
+       if (!rc)
+               smb_break_all_levII_oplock(work, fp, 0);
 out:
        kfree(new_name);
        return rc;
index f29bb03f0dc47bfcb0fe3fc5c5acff16d5a314a8..8752ac82c557bf92985bd4d87a3e37f4cd4a60dc 100644 (file)
@@ -65,6 +65,7 @@ struct ipc_msg_table_entry {
        struct hlist_node       ipc_table_hlist;
 
        void                    *response;
+       unsigned int            msg_sz;
 };
 
 static struct delayed_work ipc_timer_work;
@@ -275,6 +276,7 @@ static int handle_response(int type, void *payload, size_t sz)
                }
 
                memcpy(entry->response, payload, sz);
+               entry->msg_sz = sz;
                wake_up_interruptible(&entry->wait);
                ret = 0;
                break;
@@ -453,6 +455,34 @@ out:
        return ret;
 }
 
+static int ipc_validate_msg(struct ipc_msg_table_entry *entry)
+{
+       unsigned int msg_sz = entry->msg_sz;
+
+       if (entry->type == KSMBD_EVENT_RPC_REQUEST) {
+               struct ksmbd_rpc_command *resp = entry->response;
+
+               msg_sz = sizeof(struct ksmbd_rpc_command) + resp->payload_sz;
+       } else if (entry->type == KSMBD_EVENT_SPNEGO_AUTHEN_REQUEST) {
+               struct ksmbd_spnego_authen_response *resp = entry->response;
+
+               msg_sz = sizeof(struct ksmbd_spnego_authen_response) +
+                               resp->session_key_len + resp->spnego_blob_len;
+       } else if (entry->type == KSMBD_EVENT_SHARE_CONFIG_REQUEST) {
+               struct ksmbd_share_config_response *resp = entry->response;
+
+               if (resp->payload_sz) {
+                       if (resp->payload_sz < resp->veto_list_sz)
+                               return -EINVAL;
+
+                       msg_sz = sizeof(struct ksmbd_share_config_response) +
+                                       resp->payload_sz;
+               }
+       }
+
+       return entry->msg_sz != msg_sz ? -EINVAL : 0;
+}
+
 static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle)
 {
        struct ipc_msg_table_entry entry;
@@ -477,6 +507,13 @@ static void *ipc_msg_send_request(struct ksmbd_ipc_msg *msg, unsigned int handle
        ret = wait_event_interruptible_timeout(entry.wait,
                                               entry.response != NULL,
                                               IPC_WAIT_TIMEOUT);
+       if (entry.response) {
+               ret = ipc_validate_msg(&entry);
+               if (ret) {
+                       kvfree(entry.response);
+                       entry.response = NULL;
+               }
+       }
 out:
        down_write(&ipc_msg_table_lock);
        hash_del(&entry.ipc_table_hlist);
index 71d9779c42b10aca8bd4e0b7b667fc62386e2305..69ce6c600968479bd6832a6705352eb2d88427c1 100644 (file)
@@ -1515,29 +1515,11 @@ static int fs_bdev_thaw(struct block_device *bdev)
        return error;
 }
 
-static void fs_bdev_super_get(void *data)
-{
-       struct super_block *sb = data;
-
-       spin_lock(&sb_lock);
-       sb->s_count++;
-       spin_unlock(&sb_lock);
-}
-
-static void fs_bdev_super_put(void *data)
-{
-       struct super_block *sb = data;
-
-       put_super(sb);
-}
-
 const struct blk_holder_ops fs_holder_ops = {
        .mark_dead              = fs_bdev_mark_dead,
        .sync                   = fs_bdev_sync,
        .freeze                 = fs_bdev_freeze,
        .thaw                   = fs_bdev_thaw,
-       .get_holder             = fs_bdev_super_get,
-       .put_holder             = fs_bdev_super_put,
 };
 EXPORT_SYMBOL_GPL(fs_holder_ops);
 
@@ -1562,7 +1544,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
         * writable from userspace even for a read-only block device.
         */
        if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
-               fput(bdev_file);
+               bdev_fput(bdev_file);
                return -EACCES;
        }
 
@@ -1573,7 +1555,7 @@ int setup_bdev_super(struct super_block *sb, int sb_flags,
        if (atomic_read(&bdev->bd_fsfreeze_count) > 0) {
                if (fc)
                        warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
-               fput(bdev_file);
+               bdev_fput(bdev_file);
                return -EBUSY;
        }
        spin_lock(&sb_lock);
@@ -1693,7 +1675,7 @@ void kill_block_super(struct super_block *sb)
        generic_shutdown_super(sb);
        if (bdev) {
                sync_blockdev(bdev);
-               fput(sb->s_bdev_file);
+               bdev_fput(sb->s_bdev_file);
        }
 }
 
index 2307f8037efc3d3da4b6150fae4c42e125886005..118dedef8ebe8d4c8825ee1fff4e5b0a9ad9ff62 100644 (file)
@@ -218,6 +218,7 @@ const struct file_operations vboxsf_reg_fops = {
        .release = vboxsf_file_release,
        .fsync = noop_fsync,
        .splice_read = filemap_splice_read,
+       .setlease = simple_nosetlease,
 };
 
 const struct inode_operations vboxsf_reg_iops = {
index cabe8ac4fefc5d02de204d9695a983f7df3fb127..ffb1d565da398113461dc5e6481c553242656507 100644 (file)
@@ -151,11 +151,11 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
                if (!sbi->nls) {
                        vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
                        err = -EINVAL;
-                       goto fail_free;
+                       goto fail_destroy_idr;
                }
        }
 
-       sbi->bdi_id = ida_simple_get(&vboxsf_bdi_ida, 0, 0, GFP_KERNEL);
+       sbi->bdi_id = ida_alloc(&vboxsf_bdi_ida, GFP_KERNEL);
        if (sbi->bdi_id < 0) {
                err = sbi->bdi_id;
                goto fail_free;
@@ -221,9 +221,10 @@ fail_unmap:
        vboxsf_unmap_folder(sbi->root);
 fail_free:
        if (sbi->bdi_id >= 0)
-               ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+               ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
        if (sbi->nls)
                unload_nls(sbi->nls);
+fail_destroy_idr:
        idr_destroy(&sbi->ino_idr);
        kfree(sbi);
        return err;
@@ -268,7 +269,7 @@ static void vboxsf_put_super(struct super_block *sb)
 
        vboxsf_unmap_folder(sbi->root);
        if (sbi->bdi_id >= 0)
-               ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
+               ida_free(&vboxsf_bdi_ida, sbi->bdi_id);
        if (sbi->nls)
                unload_nls(sbi->nls);
 
index 72ac9320e6a35f57a2f0b219ad8cc6496c58c991..9515bbf0b54ce8c29314436a5390160b0ca0adef 100644 (file)
@@ -440,7 +440,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
 {
        const char *in;
        char *out;
-       size_t out_len;
        size_t out_bound_len;
        size_t in_bound_len;
 
@@ -448,7 +447,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
        in_bound_len = utf8_len;
 
        out = name;
-       out_len = 0;
        /* Reserve space for terminating 0 */
        out_bound_len = name_bound_len - 1;
 
@@ -469,7 +467,6 @@ int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
 
                out += nb;
                out_bound_len -= nb;
-               out_len += nb;
        }
 
        *out = 0;
index 1a18c381127e2183169eaa8280aa620d66340a71..f0fa02264edaaeef2d23d1101d1953d6454e8832 100644 (file)
@@ -2030,7 +2030,7 @@ xfs_free_buftarg(
        fs_put_dax(btp->bt_daxdev, btp->bt_mount);
        /* the main block device is closed by kill_block_super */
        if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
-               fput(btp->bt_bdev_file);
+               bdev_fput(btp->bt_bdev_file);
        kfree(btp);
 }
 
index c21f10ab0f5dbef4051b6bef01eb64c77247e056..bce020374c5eba5255a98d71176d7198024603d2 100644 (file)
@@ -485,7 +485,7 @@ xfs_open_devices(
                mp->m_logdev_targp = mp->m_ddev_targp;
                /* Handle won't be used, drop it */
                if (logdev_file)
-                       fput(logdev_file);
+                       bdev_fput(logdev_file);
        }
 
        return 0;
@@ -497,10 +497,10 @@ xfs_open_devices(
        xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
         if (rtdev_file)
-               fput(rtdev_file);
+               bdev_fput(rtdev_file);
  out_close_logdev:
        if (logdev_file)
-               fput(logdev_file);
+               bdev_fput(logdev_file);
        return error;
 }
 
index eb4c369a79eb31b705aa78ec28437d3de69b97b7..35d4ca4f6122c7d50e2541dbc56b1b25c93d69d6 100644 (file)
@@ -86,7 +86,7 @@ void kvm_vcpu_pmu_resync_el0(void);
  */
 #define kvm_pmu_update_vcpu_events(vcpu)                               \
        do {                                                            \
-               if (!has_vhe() && kvm_vcpu_has_pmu(vcpu))               \
+               if (!has_vhe() && kvm_arm_support_pmu_v3())             \
                        vcpu->arch.pmu.events = *kvm_get_pmu_events();  \
        } while (0)
 
index c3e8f7cf96be9e1c10169d2e7afe31696082eb8f..172c918799995f8ce6ed285bc5a8ea3341ddb6ed 100644 (file)
@@ -1505,16 +1505,6 @@ struct blk_holder_ops {
         * Thaw the file system mounted on the block device.
         */
        int (*thaw)(struct block_device *bdev);
-
-       /*
-        * If needed, get a reference to the holder.
-        */
-       void (*get_holder)(void *holder);
-
-       /*
-        * Release the holder.
-        */
-       void (*put_holder)(void *holder);
 };
 
 /*
@@ -1585,6 +1575,7 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
 
 int bdev_freeze(struct block_device *bdev);
 int bdev_thaw(struct block_device *bdev);
+void bdev_fput(struct file *bdev_file);
 
 struct io_comp_batch {
        struct request *req_list;
index 4f20f62f9d63da87800af4ac21cbc7c92dae5fb9..890e152d553ea3cc8d16b2a058f2dfd657aba1f8 100644 (file)
@@ -1574,12 +1574,26 @@ struct bpf_link {
        enum bpf_link_type type;
        const struct bpf_link_ops *ops;
        struct bpf_prog *prog;
-       struct work_struct work;
+       /* rcu is used before freeing, work can be used to schedule that
+        * RCU-based freeing before that, so they never overlap
+        */
+       union {
+               struct rcu_head rcu;
+               struct work_struct work;
+       };
 };
 
 struct bpf_link_ops {
        void (*release)(struct bpf_link *link);
+       /* deallocate link resources callback, called without RCU grace period
+        * waiting
+        */
        void (*dealloc)(struct bpf_link *link);
+       /* deallocate link resources callback, called after RCU grace period;
+        * if underlying BPF program is sleepable we go through tasks trace
+        * RCU GP and then "classic" RCU GP
+        */
+       void (*dealloc_deferred)(struct bpf_link *link);
        int (*detach)(struct bpf_link *link);
        int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
                           struct bpf_prog *old_prog);
index 00fc429b0af0fb9bbab2382a9e347fdbac383981..8dfd53b52744a4dfffb8ccb350364972658f00eb 100644 (file)
@@ -121,6 +121,8 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_PWRITE           ((__force fmode_t)0x10)
 /* File is opened for execution with sys_execve / sys_uselib */
 #define FMODE_EXEC             ((__force fmode_t)0x20)
+/* File writes are restricted (block device specific) */
+#define FMODE_WRITE_RESTRICTED  ((__force fmode_t)0x40)
 /* 32bit hashes as llseek() offset (for directories) */
 #define FMODE_32BITHASH         ((__force fmode_t)0x200)
 /* 64bit hashes as llseek() offset (for directories) */
index 3748e82b627b7044508db66adbf77c54a8e3d612..17539d08966618bfe4c2f0b41f4893c8951b22fe 100644 (file)
@@ -150,6 +150,24 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
        }
 }
 
+DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
+#if IS_ENABLED(CONFIG_IPV6)
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+#endif
+
+static inline bool udp_encap_needed(void)
+{
+       if (static_branch_unlikely(&udp_encap_needed_key))
+               return true;
+
+#if IS_ENABLED(CONFIG_IPV6)
+       if (static_branch_unlikely(&udpv6_encap_needed_key))
+               return true;
+#endif
+
+       return false;
+}
+
 static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
 {
        if (!skb_is_gso(skb))
@@ -163,6 +181,16 @@ static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
            !udp_test_bit(ACCEPT_FRAGLIST, sk))
                return true;
 
+       /* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
+        * land in a tunnel as the socket check in udp_gro_receive cannot be
+        * foolproof.
+        */
+       if (udp_encap_needed() &&
+           READ_ONCE(udp_sk(sk)->encap_rcv) &&
+           !(skb_shinfo(skb)->gso_type &
+             (SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
+               return true;
+
        return false;
 }
 
index 8701ca5f31eec353bb793ac44acef21445337cdf..5c12761cbc0e212e4f7a4c01a60c2bff9bc98027 100644 (file)
@@ -176,6 +176,15 @@ enum {
         */
        HCI_QUIRK_USE_BDADDR_PROPERTY,
 
+       /* When this quirk is set, the Bluetooth Device Address provided by
+        * the 'local-bd-address' fwnode property is incorrectly specified in
+        * big-endian order.
+        *
+        * This quirk can be set before hci_register_dev is called or
+        * during the hdev->setup vendor callback.
+        */
+       HCI_QUIRK_BDADDR_PROPERTY_BROKEN,
+
        /* When this quirk is set, the duplicate filtering during
         * scanning is based on Bluetooth devices addresses. To allow
         * RSSI based updates, restart scanning if needed.
index 76147feb0d10aed24d43a7d32a11407e81cb4e11..4eeedf14711b30b0a0829aecd71993c324fda2bb 100644 (file)
@@ -39,7 +39,6 @@ enum TRI_STATE {
 #define COMP_ENTRY_SIZE 64
 
 #define RX_BUFFERS_PER_QUEUE 512
-#define MANA_RX_DATA_ALIGN 64
 
 #define MAX_SEND_BUFFERS_PER_QUEUE 256
 
index a8bebac1e4b28dd3d0195894dc96e18f74184992..957295364a5e3c1aa3bc8a9108a7da02e7b6ee44 100644 (file)
@@ -56,6 +56,9 @@ struct hdac_ext_stream {
        u32 pphcldpl;
        u32 pphcldpu;
 
+       u32 pplcllpl;
+       u32 pplcllpu;
+
        bool decoupled:1;
        bool link_locked:1;
        bool link_prepared;
index 4038dd421150a3f03182be5d1f0a503812029c15..1dc59005d241fbe683fb98d62d86e8c04f3708eb 100644 (file)
@@ -15,7 +15,7 @@
 #ifndef __TAS2781_TLV_H__
 #define __TAS2781_TLV_H__
 
-static const DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
+static const __maybe_unused DECLARE_TLV_DB_SCALE(dvc_tlv, -10000, 100, 0);
 static const DECLARE_TLV_DB_SCALE(amp_vol_tlv, 1100, 50, 0);
 
 #endif
index ae2ff73bde7e79aa905f060f753bfc8261451972..c287925471f68ef989ffcd3022942397e9d1465c 100644 (file)
@@ -3024,17 +3024,46 @@ void bpf_link_inc(struct bpf_link *link)
        atomic64_inc(&link->refcnt);
 }
 
+static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
+{
+       struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
+
+       /* free bpf_link and its containing memory */
+       link->ops->dealloc_deferred(link);
+}
+
+static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
+{
+       if (rcu_trace_implies_rcu_gp())
+               bpf_link_defer_dealloc_rcu_gp(rcu);
+       else
+               call_rcu(rcu, bpf_link_defer_dealloc_rcu_gp);
+}
+
 /* bpf_link_free is guaranteed to be called from process context */
 static void bpf_link_free(struct bpf_link *link)
 {
+       bool sleepable = false;
+
        bpf_link_free_id(link->id);
        if (link->prog) {
+               sleepable = link->prog->sleepable;
                /* detach BPF program, clean up used resources */
                link->ops->release(link);
                bpf_prog_put(link->prog);
        }
-       /* free bpf_link and its containing memory */
-       link->ops->dealloc(link);
+       if (link->ops->dealloc_deferred) {
+               /* schedule BPF link deallocation; if underlying BPF program
+                * is sleepable, we need to first wait for RCU tasks trace
+                * sync, then go through "classic" RCU grace period
+                */
+               if (sleepable)
+                       call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
+               else
+                       call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
+       }
+       if (link->ops->dealloc)
+               link->ops->dealloc(link);
 }
 
 static void bpf_link_put_deferred(struct work_struct *work)
@@ -3544,7 +3573,7 @@ static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
 
 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
        .release = bpf_raw_tp_link_release,
-       .dealloc = bpf_raw_tp_link_dealloc,
+       .dealloc_deferred = bpf_raw_tp_link_dealloc,
        .show_fdinfo = bpf_raw_tp_link_show_fdinfo,
        .fill_link_info = bpf_raw_tp_link_fill_link_info,
 };
index 353985b2b6a279f0d5a1784d753b10002cfb216e..98188379d5c77d79d3a5e764659ed4426f931d14 100644 (file)
@@ -18379,15 +18379,18 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
                                }
                                if (!env->prog->jit_requested) {
                                        verbose(env, "JIT is required to use arena\n");
+                                       fdput(f);
                                        return -EOPNOTSUPP;
                                }
                                if (!bpf_jit_supports_arena()) {
                                        verbose(env, "JIT doesn't support arena\n");
+                                       fdput(f);
                                        return -EOPNOTSUPP;
                                }
                                env->prog->aux->arena = (void *)map;
                                if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
                                        verbose(env, "arena's user address must be set via map_extra or mmap()\n");
+                                       fdput(f);
                                        return -EINVAL;
                                }
                        }
index 0a5c4efc73c3674fa225757c6f4ccc921f758b57..9dc605f08a23141866ae2340e74d9a1f9212e658 100644 (file)
@@ -2728,7 +2728,7 @@ static int bpf_kprobe_multi_link_fill_link_info(const struct bpf_link *link,
 
 static const struct bpf_link_ops bpf_kprobe_multi_link_lops = {
        .release = bpf_kprobe_multi_link_release,
-       .dealloc = bpf_kprobe_multi_link_dealloc,
+       .dealloc_deferred = bpf_kprobe_multi_link_dealloc,
        .fill_link_info = bpf_kprobe_multi_link_fill_link_info,
 };
 
@@ -3157,6 +3157,9 @@ static void bpf_uprobe_multi_link_release(struct bpf_link *link)
 
        umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
        bpf_uprobe_unregister(&umulti_link->path, umulti_link->uprobes, umulti_link->cnt);
+       if (umulti_link->task)
+               put_task_struct(umulti_link->task);
+       path_put(&umulti_link->path);
 }
 
 static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
@@ -3164,9 +3167,6 @@ static void bpf_uprobe_multi_link_dealloc(struct bpf_link *link)
        struct bpf_uprobe_multi_link *umulti_link;
 
        umulti_link = container_of(link, struct bpf_uprobe_multi_link, link);
-       if (umulti_link->task)
-               put_task_struct(umulti_link->task);
-       path_put(&umulti_link->path);
        kvfree(umulti_link->uprobes);
        kfree(umulti_link);
 }
@@ -3242,7 +3242,7 @@ static int bpf_uprobe_multi_link_fill_link_info(const struct bpf_link *link,
 
 static const struct bpf_link_ops bpf_uprobe_multi_link_lops = {
        .release = bpf_uprobe_multi_link_release,
-       .dealloc = bpf_uprobe_multi_link_dealloc,
+       .dealloc_deferred = bpf_uprobe_multi_link_dealloc,
        .fill_link_info = bpf_uprobe_multi_link_fill_link_info,
 };
 
index e265a0ca6bddd40711235c8d7560a6f409a51241..f7e90b4769bba92ef8187b0a96cb310f0c13d5f8 100644 (file)
@@ -1583,7 +1583,7 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
                received = rsize;
        }
 
-       p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
+       p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received);
 
        if (non_zc) {
                int n = copy_to_iter(dataptr, received, to);
@@ -1609,9 +1609,6 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
        int total = 0;
        *err = 0;
 
-       p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %zd\n",
-                fid->fid, offset, iov_iter_count(from));
-
        while (iov_iter_count(from)) {
                int count = iov_iter_count(from);
                int rsize = fid->iounit;
@@ -1623,6 +1620,9 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
                if (count < rsize)
                        rsize = count;
 
+               p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n",
+                        fid->fid, offset, rsize, count);
+
                /* Don't bother zerocopy for small IO (< 1024) */
                if (clnt->trans_mod->zc_request && rsize > 1024) {
                        req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0,
@@ -1650,7 +1650,7 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
                        written = rsize;
                }
 
-               p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);
+               p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written);
 
                p9_req_put(clnt, req);
                iov_iter_revert(from, count - written - iov_iter_count(from));
index 1a3948b8c493eda3aca297896bd8adf7a63d443a..196060dc6138af10e99ad04a76ee36a11f770c65 100644 (file)
@@ -95,7 +95,6 @@ struct p9_poll_wait {
  * @unsent_req_list: accounting for requests that haven't been sent
  * @rreq: read request
  * @wreq: write request
- * @req: current request being processed (if any)
  * @tmp_buf: temporary buffer to read in header
  * @rc: temporary fcall for reading current frame
  * @wpos: write position for current frame
index c5462486dbca10c460e2bd4c34d81162a1e1c9d6..282ec581c0720105b0411d9596c7ac12ff3bf115 100644 (file)
@@ -105,7 +105,7 @@ void ax25_dev_device_down(struct net_device *dev)
        spin_lock_bh(&ax25_dev_lock);
 
 #ifdef CONFIG_AX25_DAMA_SLAVE
-       ax25_ds_del_timer(ax25_dev);
+       timer_shutdown_sync(&ax25_dev->dama.slave_timer);
 #endif
 
        /*
index 1690ae57a09dbbdb9041d589a5906cbde2ee9f27..a7028d38c1f5cc756aed90c3859638284996011f 100644 (file)
@@ -2874,7 +2874,7 @@ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err)
        cancel_delayed_work_sync(&hdev->ncmd_timer);
        atomic_set(&hdev->cmd_cnt, 1);
 
-       hci_cmd_sync_cancel_sync(hdev, -err);
+       hci_cmd_sync_cancel_sync(hdev, err);
 }
 
 /* Suspend HCI device */
@@ -2894,7 +2894,7 @@ int hci_suspend_dev(struct hci_dev *hdev)
                return 0;
 
        /* Cancel potentially blocking sync operation before suspend */
-       hci_cancel_cmd_sync(hdev, -EHOSTDOWN);
+       hci_cancel_cmd_sync(hdev, EHOSTDOWN);
 
        hci_req_sync_lock(hdev);
        ret = hci_suspend_sync(hdev);
@@ -4210,7 +4210,7 @@ static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb)
 
        err = hci_send_frame(hdev, skb);
        if (err < 0) {
-               hci_cmd_sync_cancel_sync(hdev, err);
+               hci_cmd_sync_cancel_sync(hdev, -err);
                return;
        }
 
index 233453807b50992358bec11dfddfe670d953ff7a..ce3ff2fa72e58a21475a4ecdbcd33ed58d86b96b 100644 (file)
@@ -218,10 +218,12 @@ static int conn_info_min_age_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val == 0 || val > hdev->conn_info_max_age)
+       hci_dev_lock(hdev);
+       if (val == 0 || val > hdev->conn_info_max_age) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->conn_info_min_age = val;
        hci_dev_unlock(hdev);
 
@@ -246,10 +248,12 @@ static int conn_info_max_age_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val == 0 || val < hdev->conn_info_min_age)
+       hci_dev_lock(hdev);
+       if (val == 0 || val < hdev->conn_info_min_age) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->conn_info_max_age = val;
        hci_dev_unlock(hdev);
 
@@ -567,10 +571,12 @@ static int sniff_min_interval_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val == 0 || val % 2 || val > hdev->sniff_max_interval)
+       hci_dev_lock(hdev);
+       if (val == 0 || val % 2 || val > hdev->sniff_max_interval) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->sniff_min_interval = val;
        hci_dev_unlock(hdev);
 
@@ -595,10 +601,12 @@ static int sniff_max_interval_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val == 0 || val % 2 || val < hdev->sniff_min_interval)
+       hci_dev_lock(hdev);
+       if (val == 0 || val % 2 || val < hdev->sniff_min_interval) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->sniff_max_interval = val;
        hci_dev_unlock(hdev);
 
@@ -850,10 +858,12 @@ static int conn_min_interval_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval)
+       hci_dev_lock(hdev);
+       if (val < 0x0006 || val > 0x0c80 || val > hdev->le_conn_max_interval) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->le_conn_min_interval = val;
        hci_dev_unlock(hdev);
 
@@ -878,10 +888,12 @@ static int conn_max_interval_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval)
+       hci_dev_lock(hdev);
+       if (val < 0x0006 || val > 0x0c80 || val < hdev->le_conn_min_interval) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->le_conn_max_interval = val;
        hci_dev_unlock(hdev);
 
@@ -990,10 +1002,12 @@ static int adv_min_interval_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval)
+       hci_dev_lock(hdev);
+       if (val < 0x0020 || val > 0x4000 || val > hdev->le_adv_max_interval) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->le_adv_min_interval = val;
        hci_dev_unlock(hdev);
 
@@ -1018,10 +1032,12 @@ static int adv_max_interval_set(void *data, u64 val)
 {
        struct hci_dev *hdev = data;
 
-       if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval)
+       hci_dev_lock(hdev);
+       if (val < 0x0020 || val > 0x4000 || val < hdev->le_adv_min_interval) {
+               hci_dev_unlock(hdev);
                return -EINVAL;
+       }
 
-       hci_dev_lock(hdev);
        hdev->le_adv_max_interval = val;
        hci_dev_unlock(hdev);
 
index 4ae2248240121c6c36040b2af182c1bcd3b19944..a8b8cfebe0180cce2fb661e8e5f21a79bf7a7656 100644 (file)
@@ -3208,6 +3208,31 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, void *data,
                if (test_bit(HCI_ENCRYPT, &hdev->flags))
                        set_bit(HCI_CONN_ENCRYPT, &conn->flags);
 
+               /* "Link key request" completed ahead of "connect request" completes */
+               if (ev->encr_mode == 1 && !test_bit(HCI_CONN_ENCRYPT, &conn->flags) &&
+                   ev->link_type == ACL_LINK) {
+                       struct link_key *key;
+                       struct hci_cp_read_enc_key_size cp;
+
+                       key = hci_find_link_key(hdev, &ev->bdaddr);
+                       if (key) {
+                               set_bit(HCI_CONN_ENCRYPT, &conn->flags);
+
+                               if (!(hdev->commands[20] & 0x10)) {
+                                       conn->enc_key_size = HCI_LINK_KEY_SIZE;
+                               } else {
+                                       cp.handle = cpu_to_le16(conn->handle);
+                                       if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE,
+                                                        sizeof(cp), &cp)) {
+                                               bt_dev_err(hdev, "sending read key size failed");
+                                               conn->enc_key_size = HCI_LINK_KEY_SIZE;
+                                       }
+                               }
+
+                               hci_encrypt_cfm(conn, ev->status);
+                       }
+               }
+
                /* Get remote features */
                if (conn->type == ACL_LINK) {
                        struct hci_cp_read_remote_features cp;
index f6b662369322b31b9838cdb13e8e9839f53120ac..8fe02921adf15d4b968be310415bf9383ae3d63d 100644 (file)
@@ -617,7 +617,10 @@ void hci_cmd_sync_cancel_sync(struct hci_dev *hdev, int err)
        bt_dev_dbg(hdev, "err 0x%2.2x", err);
 
        if (hdev->req_status == HCI_REQ_PEND) {
-               hdev->req_result = err;
+               /* req_result is __u32 so error must be positive to be properly
+                * propagated.
+                */
+               hdev->req_result = err < 0 ? -err : err;
                hdev->req_status = HCI_REQ_CANCELED;
 
                wake_up_interruptible(&hdev->req_wait_q);
@@ -3416,7 +3419,10 @@ static void hci_dev_get_bd_addr_from_property(struct hci_dev *hdev)
        if (ret < 0 || !bacmp(&ba, BDADDR_ANY))
                return;
 
-       bacpy(&hdev->public_addr, &ba);
+       if (test_bit(HCI_QUIRK_BDADDR_PROPERTY_BROKEN, &hdev->quirks))
+               baswap(&hdev->public_addr, &ba);
+       else
+               bacpy(&hdev->public_addr, &ba);
 }
 
 struct hci_init_stage {
index 99d82676f780ac49d01151fa9c585f44f9ea8ccc..cbd0e3586c3f61904efb4db7d9101d7770c852e7 100644 (file)
@@ -1111,6 +1111,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
        struct ebt_table_info *newinfo;
        struct ebt_replace tmp;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
@@ -1423,6 +1425,8 @@ static int update_counters(struct net *net, sockptr_t arg, unsigned int len)
 {
        struct ebt_replace hlp;
 
+       if (len < sizeof(hlp))
+               return -EINVAL;
        if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
                return -EFAULT;
 
@@ -2352,6 +2356,8 @@ static int compat_update_counters(struct net *net, sockptr_t arg,
 {
        struct compat_ebt_replace hlp;
 
+       if (len < sizeof(hlp))
+               return -EINVAL;
        if (copy_from_sockptr(&hlp, arg, sizeof(hlp)))
                return -EFAULT;
 
index 9a67003e49db87f3f92b6c6296b3e7a5ca9d9171..984ff8b9d0e1aa5646a7237a8cf0b0a21c2aa559 100644 (file)
@@ -429,7 +429,7 @@ EXPORT_PER_CPU_SYMBOL(softnet_data);
  * PP consumers must pay attention to run APIs in the appropriate context
  * (e.g. NAPI context).
  */
-static DEFINE_PER_CPU_ALIGNED(struct page_pool *, system_page_pool);
+static DEFINE_PER_CPU(struct page_pool *, system_page_pool);
 
 #ifdef CONFIG_LOCKDEP
 /*
index ee30d4f0c03876e78795397d1c495881a2c9e80f..83f35d99a682c21dae11683fec72074a898fbac2 100644 (file)
@@ -192,8 +192,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
        }
 
 merge:
-       /* sk owenrship - if any - completely transferred to the aggregated packet */
+       /* sk ownership - if any - completely transferred to the aggregated packet */
        skb->destructor = NULL;
+       skb->sk = NULL;
        delta_truesize = skb->truesize;
        if (offset > headlen) {
                unsigned int eat = offset - headlen;
index 27d733c0f65e16bf312eb959e98b3443aefbd812..8598466a3805784f58497d9607c5ace6f081cefb 100644 (file)
@@ -411,6 +411,9 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
        struct sock *sk;
        int err = 0;
 
+       if (irqs_disabled())
+               return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
        spin_lock_bh(&stab->lock);
        sk = *psk;
        if (!sk_test || sk_test == sk)
@@ -933,6 +936,9 @@ static long sock_hash_delete_elem(struct bpf_map *map, void *key)
        struct bpf_shtab_elem *elem;
        int ret = -ENOENT;
 
+       if (irqs_disabled())
+               return -EOPNOTSUPP; /* locks here are hardirq-unsafe */
+
        hash = sock_hash_bucket_hash(key, key_size);
        bucket = sock_hash_select_bucket(htab, hash);
 
index c98b5b71ad7c32aead8685db5a96d76cb673f0e4..e9d45133d6412e57d88414b0baffaec625d0d724 100644 (file)
@@ -132,30 +132,29 @@ static int hsr_dev_open(struct net_device *dev)
 {
        struct hsr_priv *hsr;
        struct hsr_port *port;
-       char designation;
+       const char *designation = NULL;
 
        hsr = netdev_priv(dev);
-       designation = '\0';
 
        hsr_for_each_port(hsr, port) {
                if (port->type == HSR_PT_MASTER)
                        continue;
                switch (port->type) {
                case HSR_PT_SLAVE_A:
-                       designation = 'A';
+                       designation = "Slave A";
                        break;
                case HSR_PT_SLAVE_B:
-                       designation = 'B';
+                       designation = "Slave B";
                        break;
                default:
-                       designation = '?';
+                       designation = "Unknown";
                }
                if (!is_slave_up(port->dev))
-                       netdev_warn(dev, "Slave %c (%s) is not up; please bring it up to get a fully working HSR network\n",
+                       netdev_warn(dev, "%s (%s) is not up; please bring it up to get a fully working HSR network\n",
                                    designation, port->dev->name);
        }
 
-       if (designation == '\0')
+       if (!designation)
                netdev_warn(dev, "No slave devices configured\n");
 
        return 0;
index c038e28e2f1e66bf10c7f67ffe073e6790b2d6ce..3b38610958ee4bc3d9296c9b5e5fce6ab5c97c2c 100644 (file)
@@ -203,8 +203,15 @@ static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
                                   kuid_t sk_uid, bool relax,
                                   bool reuseport_cb_ok, bool reuseport_ok)
 {
-       if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
-               return false;
+       if (ipv6_only_sock(sk2)) {
+               if (sk->sk_family == AF_INET)
+                       return false;
+
+#if IS_ENABLED(CONFIG_IPV6)
+               if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+                       return false;
+#endif
+       }
 
        return inet_bind_conflict(sk, sk2, sk_uid, relax,
                                  reuseport_cb_ok, reuseport_ok);
@@ -287,6 +294,7 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
        struct sock_reuseport *reuseport_cb;
        struct inet_bind_hashbucket *head2;
        struct inet_bind2_bucket *tb2;
+       bool conflict = false;
        bool reuseport_cb_ok;
 
        rcu_read_lock();
@@ -299,18 +307,20 @@ static bool inet_bhash2_addr_any_conflict(const struct sock *sk, int port, int l
 
        spin_lock(&head2->lock);
 
-       inet_bind_bucket_for_each(tb2, &head2->chain)
-               if (inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
-                       break;
+       inet_bind_bucket_for_each(tb2, &head2->chain) {
+               if (!inet_bind2_bucket_match_addr_any(tb2, net, port, l3mdev, sk))
+                       continue;
 
-       if (tb2 && inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
-                                       reuseport_ok)) {
-               spin_unlock(&head2->lock);
-               return true;
+               if (!inet_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok, reuseport_ok))
+                       continue;
+
+               conflict = true;
+               break;
        }
 
        spin_unlock(&head2->lock);
-       return false;
+
+       return conflict;
 }
 
 /*
index 7b16c211b904473cc5e350aafdefb86fbf1b3693..57ddcd8c62f67e493b74634a793592fcd259e04e 100644 (file)
@@ -280,8 +280,13 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
                                          tpi->flags | TUNNEL_NO_KEY,
                                          iph->saddr, iph->daddr, 0);
        } else {
+               if (unlikely(!pskb_may_pull(skb,
+                                           gre_hdr_len + sizeof(*ershdr))))
+                       return PACKET_REJECT;
+
                ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
                ver = ershdr->ver;
+               iph = ip_hdr(skb);
                tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
                                          tpi->flags | TUNNEL_KEY,
                                          iph->saddr, iph->daddr, tpi->key);
index 2407066b0fec1121d71561ecbad6f4f87ecdebbc..b150c9929b12e86219a55c77da480e0c538b3449 100644 (file)
@@ -956,6 +956,8 @@ static int do_replace(struct net *net, sockptr_t arg, unsigned int len)
        void *loc_cpu_entry;
        struct arpt_entry *iter;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
@@ -1254,6 +1256,8 @@ static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
        void *loc_cpu_entry;
        struct arpt_entry *iter;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
index 7da1df4997d057a4292927c2041687c2b39d4a01..487670759578168c5ff53bce6642898fc41936b3 100644 (file)
@@ -1108,6 +1108,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
        void *loc_cpu_entry;
        struct ipt_entry *iter;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
@@ -1492,6 +1494,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
        void *loc_cpu_entry;
        struct ipt_entry *iter;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
index 661d0e0d273f616ad82746b69b2c76d056633017..c02bf011d4a6f487b2c69e48e5032068eed3debc 100644 (file)
@@ -582,6 +582,13 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk,
 }
 
 DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key);
+EXPORT_SYMBOL(udp_encap_needed_key);
+
+#if IS_ENABLED(CONFIG_IPV6)
+DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+EXPORT_SYMBOL(udpv6_encap_needed_key);
+#endif
+
 void udp_encap_enable(void)
 {
        static_branch_inc(&udp_encap_needed_key);
index b9880743765c6c24c28bea095f16f0cf091664ce..3498dd1d0694dc3ddb984177d2ddffb7b8abd0b9 100644 (file)
@@ -449,8 +449,9 @@ static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
        NAPI_GRO_CB(p)->count++;
        p->data_len += skb->len;
 
-       /* sk owenrship - if any - completely transferred to the aggregated packet */
+       /* sk ownership - if any - completely transferred to the aggregated packet */
        skb->destructor = NULL;
+       skb->sk = NULL;
        p->truesize += skb->truesize;
        p->len += skb->len;
 
@@ -551,11 +552,19 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
        unsigned int off = skb_gro_offset(skb);
        int flush = 1;
 
-       /* we can do L4 aggregation only if the packet can't land in a tunnel
-        * otherwise we could corrupt the inner stream
+       /* We can do L4 aggregation only if the packet can't land in a tunnel
+        * otherwise we could corrupt the inner stream. Detecting such packets
+        * cannot be foolproof and the aggregation might still happen in some
+        * cases. Such packets should be caught in udp_unexpected_gso later.
         */
        NAPI_GRO_CB(skb)->is_flist = 0;
        if (!sk || !udp_sk(sk)->gro_receive) {
+               /* If the packet was locally encapsulated in a UDP tunnel that
+                * wasn't detected above, do not GRO.
+                */
+               if (skb->encapsulation)
+                       goto out;
+
                if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
                        NAPI_GRO_CB(skb)->is_flist = sk ? !udp_test_bit(GRO_ENABLED, sk) : 1;
 
@@ -719,13 +728,7 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
                skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
                skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 
-               if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-                       if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
-                               skb->csum_level++;
-               } else {
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       skb->csum_level = 0;
-               }
+               __skb_incr_checksum_unnecessary(skb);
 
                return 0;
        }
index 5c558dc1c6838681c2848412dced72a41fe764be..7209419cfb0e9c295a3feb5ecd3f9e1720ca16dc 100644 (file)
@@ -651,19 +651,19 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
        if (!w) {
                /* New dump:
                 *
-                * 1. hook callback destructor.
-                */
-               cb->args[3] = (long)cb->done;
-               cb->done = fib6_dump_done;
-
-               /*
-                * 2. allocate and initialize walker.
+                * 1. allocate and initialize walker.
                 */
                w = kzalloc(sizeof(*w), GFP_ATOMIC);
                if (!w)
                        return -ENOMEM;
                w->func = fib6_dump_node;
                cb->args[2] = (long)w;
+
+               /* 2. hook callback destructor.
+                */
+               cb->args[3] = (long)cb->done;
+               cb->done = fib6_dump_done;
+
        }
 
        arg.skb = skb;
index ca7e77e842835a6d153891fdca7dc8f196e0a2ba..c89aef524df9a2039d223fd2dd7566a9e1f7d3f4 100644 (file)
@@ -528,6 +528,9 @@ static int ip6erspan_rcv(struct sk_buff *skb,
        struct ip6_tnl *tunnel;
        u8 ver;
 
+       if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+               return PACKET_REJECT;
+
        ipv6h = ipv6_hdr(skb);
        ershdr = (struct erspan_base_hdr *)skb->data;
        ver = ershdr->ver;
index fd9f049d6d41e77eacc10ce074a8a0d96b0d2e11..636b360311c5365fba2330f6ca2f7f1b6dd1363e 100644 (file)
@@ -1125,6 +1125,8 @@ do_replace(struct net *net, sockptr_t arg, unsigned int len)
        void *loc_cpu_entry;
        struct ip6t_entry *iter;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
@@ -1501,6 +1503,8 @@ compat_do_replace(struct net *net, sockptr_t arg, unsigned int len)
        void *loc_cpu_entry;
        struct ip6t_entry *iter;
 
+       if (len < sizeof(tmp))
+               return -EINVAL;
        if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0)
                return -EFAULT;
 
index 7c1e6469d091d2252efa5b5ba7270586cef89c29..8b1dd7f512491d806e4d0a9fc5297a255dafd5a4 100644 (file)
@@ -447,7 +447,7 @@ csum_copy_err:
        goto try_again;
 }
 
-DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
+DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
 void udpv6_encap_enable(void)
 {
        static_branch_inc(&udpv6_encap_needed_key);
index 312bcaeea96fb78ac488124cf7795aa834392c64..bbd347de00b450bb3ecbbfa41c4dab9d36bb79d9 100644 (file)
@@ -174,13 +174,7 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
                skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
                skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
 
-               if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-                       if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
-                               skb->csum_level++;
-               } else {
-                       skb->ip_summed = CHECKSUM_UNNECESSARY;
-                       skb->csum_level = 0;
-               }
+               __skb_incr_checksum_unnecessary(skb);
 
                return 0;
        }
index 3a1967bc7bad63d5a8a628b3f3b868e3a27baaca..7e74b812e366ae311f52615e9b304d6fe8b924b8 100644 (file)
@@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
                                mptcp_set_state(newsk, TCP_CLOSE);
                }
        } else {
-               MPTCP_INC_STATS(sock_net(ssk),
-                               MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
 tcpfallback:
                newsk->sk_kern_sock = kern;
                lock_sock(newsk);
index dcd1c76d2a3ba1ccc31a3e9279f725cd6d433782..73fdf423de44eef5d2c3085515ad475bf63fb718 100644 (file)
@@ -1493,6 +1493,10 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
        struct mptcp_subflow_context *subflow;
        int space, cap;
 
+       /* bpf can land here with a wrong sk type */
+       if (sk->sk_protocol == IPPROTO_TCP)
+               return -EINVAL;
+
        if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
                cap = sk->sk_rcvbuf >> 1;
        else
index 1626dd20c68f1f0d67fdb8e0a2fbd2c2d1e70662..6042a47da61be8bc3000ab485fe6fbb7bff387b6 100644 (file)
@@ -905,6 +905,8 @@ dispose_child:
        return child;
 
 fallback:
+       if (fallback)
+               SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK);
        mptcp_subflow_drop_ctx(child);
        return child;
 }
index fd86f2720c9e776b08db64c6f1083a425c289754..d89d779467197a0846406e0b0ce6938e8a3d404d 100644 (file)
@@ -1209,10 +1209,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx)
                return true;
 
        list_for_each_entry(trans, &nft_net->commit_list, list) {
-               if ((trans->msg_type == NFT_MSG_NEWCHAIN ||
-                    trans->msg_type == NFT_MSG_DELCHAIN) &&
-                   trans->ctx.table == ctx->table &&
-                   nft_trans_chain_update(trans))
+               if (trans->ctx.table == ctx->table &&
+                   ((trans->msg_type == NFT_MSG_NEWCHAIN &&
+                     nft_trans_chain_update(trans)) ||
+                    (trans->msg_type == NFT_MSG_DELCHAIN &&
+                     nft_is_base_chain(trans->ctx.chain))))
                        return true;
        }
 
@@ -2449,6 +2450,9 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
                struct nft_stats __percpu *stats = NULL;
                struct nft_chain_hook hook = {};
 
+               if (table->flags & __NFT_TABLE_F_UPDATE)
+                       return -EINVAL;
+
                if (flags & NFT_CHAIN_BINDING)
                        return -EOPNOTSUPP;
 
@@ -8293,11 +8297,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
        return err;
 }
 
+/* call under rcu_read_lock */
 static const struct nf_flowtable_type *__nft_flowtable_type_get(u8 family)
 {
        const struct nf_flowtable_type *type;
 
-       list_for_each_entry(type, &nf_tables_flowtables, list) {
+       list_for_each_entry_rcu(type, &nf_tables_flowtables, list) {
                if (family == type->family)
                        return type;
        }
@@ -8309,9 +8314,13 @@ nft_flowtable_type_get(struct net *net, u8 family)
 {
        const struct nf_flowtable_type *type;
 
+       rcu_read_lock();
        type = __nft_flowtable_type_get(family);
-       if (type != NULL && try_module_get(type->owner))
+       if (type != NULL && try_module_get(type->owner)) {
+               rcu_read_unlock();
                return type;
+       }
+       rcu_read_unlock();
 
        lockdep_nfnl_nft_mutex_not_held();
 #ifdef CONFIG_MODULES
@@ -10455,10 +10464,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
        struct nft_trans *trans, *next;
        LIST_HEAD(set_update_list);
        struct nft_trans_elem *te;
+       int err = 0;
 
        if (action == NFNL_ABORT_VALIDATE &&
            nf_tables_validate(net) < 0)
-               return -EAGAIN;
+               err = -EAGAIN;
 
        list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list,
                                         list) {
@@ -10650,12 +10660,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
                nf_tables_abort_release(trans);
        }
 
-       if (action == NFNL_ABORT_AUTOLOAD)
-               nf_tables_module_autoload(net);
-       else
-               nf_tables_module_autoload_cleanup(net);
-
-       return 0;
+       return err;
 }
 
 static int nf_tables_abort(struct net *net, struct sk_buff *skb,
@@ -10668,6 +10673,17 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
        gc_seq = nft_gc_seq_begin(nft_net);
        ret = __nf_tables_abort(net, action);
        nft_gc_seq_end(nft_net, gc_seq);
+
+       WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+       /* module autoload needs to happen after GC sequence update because it
+        * temporarily releases and grabs mutex again.
+        */
+       if (action == NFNL_ABORT_AUTOLOAD)
+               nf_tables_module_autoload(net);
+       else
+               nf_tables_module_autoload_cleanup(net);
+
        mutex_unlock(&nft_net->commit_mutex);
 
        return ret;
@@ -11473,9 +11489,10 @@ static void __net_exit nf_tables_exit_net(struct net *net)
 
        gc_seq = nft_gc_seq_begin(nft_net);
 
-       if (!list_empty(&nft_net->commit_list) ||
-           !list_empty(&nft_net->module_list))
-               __nf_tables_abort(net, NFNL_ABORT_NONE);
+       WARN_ON_ONCE(!list_empty(&nft_net->commit_list));
+
+       if (!list_empty(&nft_net->module_list))
+               nf_tables_module_autoload_cleanup(net);
 
        __nft_release_tables(net);
 
@@ -11567,6 +11584,7 @@ static void __exit nf_tables_module_exit(void)
        unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
        nft_chain_filter_fini();
        nft_chain_route_fini();
+       nf_tables_trans_destroy_flush_work();
        unregister_pernet_subsys(&nf_tables_net_ops);
        cancel_work_sync(&trans_gc_work);
        cancel_work_sync(&trans_destroy_work);
index a4e3c5de998be4c756cb0dc423ee9a7e7fa3e1a9..00dbcd4d28e68097c612c628b7d4193124be4c9c 100644 (file)
@@ -302,7 +302,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
                }
                ret = PTR_ERR(trans_private);
                /* Trigger connection so that its ready for the next retry */
-               if (ret == -ENODEV)
+               if (ret == -ENODEV && cp)
                        rds_conn_connect_if_down(cp->cp_conn);
                goto out;
        }
index 39945b139c4817584fb9803b9e65c89fef68eca0..cd0accaf844a18e4a6a626adba5fae05df66b0a3 100644 (file)
@@ -241,13 +241,13 @@ static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a,
        struct tcf_skbmod *d = to_skbmod(a);
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_skbmod_params  *p;
-       struct tc_skbmod opt = {
-               .index   = d->tcf_index,
-               .refcnt  = refcount_read(&d->tcf_refcnt) - ref,
-               .bindcnt = atomic_read(&d->tcf_bindcnt) - bind,
-       };
+       struct tc_skbmod opt;
        struct tcf_t t;
 
+       memset(&opt, 0, sizeof(opt));
+       opt.index   = d->tcf_index;
+       opt.refcnt  = refcount_read(&d->tcf_refcnt) - ref,
+       opt.bindcnt = atomic_read(&d->tcf_bindcnt) - bind;
        spin_lock_bh(&d->tcf_lock);
        opt.action = d->tcf_action;
        p = rcu_dereference_protected(d->skbmod_p,
index 65e05b0c98e461953aa8d98020142f0abe3ad8a7..60239378d43fb7adfe3926f927f3883f09673c16 100644 (file)
@@ -809,7 +809,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
                notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
                                                       !qdisc_is_offloaded);
                /* TODO: perform the search on a per txq basis */
-               sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
+               sch = qdisc_lookup_rcu(qdisc_dev(sch), TC_H_MAJ(parentid));
                if (sch == NULL) {
                        WARN_ON_ONCE(parentid != TC_H_ROOT);
                        break;
index 1748268e0694f20d35dbc1a6f5190d5914c7f453..ee5d306a96d0f870880013d90a396f0b3988e8b2 100644 (file)
@@ -120,7 +120,6 @@ virtio_transport_send_pkt_work(struct work_struct *work)
                if (!skb)
                        break;
 
-               virtio_transport_deliver_tap_pkt(skb);
                reply = virtio_vsock_skb_reply(skb);
                sgs = vsock->out_sgs;
                sg_init_one(sgs[out_sg], virtio_vsock_hdr(skb),
@@ -170,6 +169,8 @@ virtio_transport_send_pkt_work(struct work_struct *work)
                        break;
                }
 
+               virtio_transport_deliver_tap_pkt(skb);
+
                if (reply) {
                        struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
                        int val;
index 967f1abb0edbd83dd26c9872ed9254992aadea6d..cb1be22afc65ffa9196b984c991133efffd0da3f 100755 (executable)
@@ -1541,7 +1541,7 @@ sub create_parameterlist($$$$) {
                     save_struct_actual($2);
 
                     push_parameter($2, "$type $1", $arg, $file, $declaration_name);
-                } elsif ($param =~ m/(.*?):(\d+)/) {
+                } elsif ($param =~ m/(.*?):(\w+)/) {
                     if ($type ne "") { # skip unnamed bit-fields
                         save_struct_actual($1);
                         push_parameter($1, "$type:$2", $arg, $file, $declaration_name)
index 7e118858b545c15bc4d0d03787e01d715423bafe..0a9a0ac3f2662447699ec355253737fe6a8d67f0 100644 (file)
@@ -1793,11 +1793,11 @@ int security_path_mknod(const struct path *dir, struct dentry *dentry,
 EXPORT_SYMBOL(security_path_mknod);
 
 /**
- * security_path_post_mknod() - Update inode security field after file creation
+ * security_path_post_mknod() - Update inode security after reg file creation
  * @idmap: idmap of the mount
  * @dentry: new file
  *
- * Update inode security field after a file has been created.
+ * Update inode security field after a regular file has been created.
  */
 void security_path_post_mknod(struct mnt_idmap *idmap, struct dentry *dentry)
 {
index 0619a1cbbfbe41a55dbb53021d769644978e0ada..074d6c2714eb557f6f49e5f4730b3dbc99480898 100644 (file)
@@ -2123,7 +2123,6 @@ static struct file_system_type sel_fs_type = {
        .kill_sb        = sel_kill_sb,
 };
 
-static struct vfsmount *selinuxfs_mount __ro_after_init;
 struct path selinux_null __ro_after_init;
 
 static int __init init_sel_fs(void)
@@ -2145,18 +2144,21 @@ static int __init init_sel_fs(void)
                return err;
        }
 
-       selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type);
-       if (IS_ERR(selinuxfs_mount)) {
+       selinux_null.mnt = kern_mount(&sel_fs_type);
+       if (IS_ERR(selinux_null.mnt)) {
                pr_err("selinuxfs:  could not mount!\n");
-               err = PTR_ERR(selinuxfs_mount);
-               selinuxfs_mount = NULL;
+               err = PTR_ERR(selinux_null.mnt);
+               selinux_null.mnt = NULL;
+               return err;
        }
+
        selinux_null.dentry = d_hash_and_lookup(selinux_null.mnt->mnt_root,
                                                &null_name);
        if (IS_ERR(selinux_null.dentry)) {
                pr_err("selinuxfs:  could not lookup null!\n");
                err = PTR_ERR(selinux_null.dentry);
                selinux_null.dentry = NULL;
+               return err;
        }
 
        return err;
index 0ba8f0c4cd99a27aaff1d3b43edd306daf0ad857..3a593da09280dca9e5b59dc96c6c2cb27cac6b6b 100644 (file)
@@ -725,7 +725,13 @@ static void __exit amiga_audio_remove(struct platform_device *pdev)
        dmasound_deinit();
 }
 
-static struct platform_driver amiga_audio_driver = {
+/*
+ * amiga_audio_remove() lives in .exit.text. For drivers registered via
+ * module_platform_driver_probe() this is ok because they cannot get unbound at
+ * runtime. So mark the driver struct with __refdata to prevent modpost
+ * triggering a section mismatch warning.
+ */
+static struct platform_driver amiga_audio_driver __refdata = {
        .remove_new = __exit_p(amiga_audio_remove),
        .driver = {
                .name   = "amiga-audio",
index d36234b88fb4219fec68e3f68103d01ee4c224ab..941bfbf812ed305bbfb368771d66134703ba8bea 100644 (file)
@@ -255,7 +255,7 @@ lookup_voices(struct snd_emux *emu, struct snd_emu10k1 *hw,
                /* check if sample is finished playing (non-looping only) */
                if (bp != best + V_OFF && bp != best + V_FREE &&
                    (vp->reg.sample_mode & SNDRV_SFNT_SAMPLE_SINGLESHOT)) {
-                       val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch) - 64;
+                       val = snd_emu10k1_ptr_read(hw, CCCA_CURRADDR, vp->ch);
                        if (val >= vp->reg.loopstart)
                                bp = best + V_OFF;
                }
@@ -362,7 +362,7 @@ start_voice(struct snd_emux_voice *vp)
 
        map = (hw->silent_page.addr << hw->address_mode) | (hw->address_mode ? MAP_PTI_MASK1 : MAP_PTI_MASK0);
 
-       addr = vp->reg.start + 64;
+       addr = vp->reg.start;
        temp = vp->reg.parm.filterQ;
        ccca = (temp << 28) | addr;
        if (vp->apitch < 0xe400)
@@ -430,9 +430,6 @@ start_voice(struct snd_emux_voice *vp)
                /* Q & current address (Q 4bit value, MSB) */
                CCCA, ccca,
 
-               /* cache */
-               CCR, REG_VAL_PUT(CCR_CACHEINVALIDSIZE, 64),
-
                /* reset volume */
                VTFT, vtarget | vp->ftarget,
                CVCF, vtarget | CVCF_CURRENTFILTER_MASK,
index 72ec872afb8d27de1d2b23288988bf4a5e4f4b88..8fb688e4141485cdf1d29ba73ed25ddc17a2a936 100644 (file)
@@ -108,7 +108,10 @@ static const struct cs35l41_config cs35l41_config_table[] = {
        { "10431F12", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 1000, 4500, 24 },
        { "10431F1F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, -1, 0, 0, 0, 0 },
        { "10431F62", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 0, 0, 0 },
+       { "10433A60", 2, INTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 1, 2, 0, 1000, 4500, 24 },
        { "17AA386F", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, -1, -1, 0, 0, 0 },
+       { "17AA3877", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
+       { "17AA3878", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
        { "17AA38A9", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
        { "17AA38AB", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 2, -1, 0, 0, 0 },
        { "17AA38B4", 2, EXTERNAL, { CS35L41_LEFT, CS35L41_RIGHT, 0, 0 }, 0, 1, -1, 0, 0, 0 },
@@ -496,7 +499,10 @@ static const struct cs35l41_prop_model cs35l41_prop_model_table[] = {
        { "CSC3551", "10431F12", generic_dsd_config },
        { "CSC3551", "10431F1F", generic_dsd_config },
        { "CSC3551", "10431F62", generic_dsd_config },
+       { "CSC3551", "10433A60", generic_dsd_config },
        { "CSC3551", "17AA386F", generic_dsd_config },
+       { "CSC3551", "17AA3877", generic_dsd_config },
+       { "CSC3551", "17AA3878", generic_dsd_config },
        { "CSC3551", "17AA38A9", generic_dsd_config },
        { "CSC3551", "17AA38AB", generic_dsd_config },
        { "CSC3551", "17AA38B4", generic_dsd_config },
index 13beee807308f1763145cc1f9c1590e427236dc6..40f2f97944d54c916d58279a94f84e6be69b5bba 100644 (file)
@@ -56,10 +56,19 @@ static const struct i2c_device_id cs35l56_hda_i2c_id[] = {
        {}
 };
 
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+       { "CSC3554", 0 },
+       { "CSC3556", 0 },
+       { "CSC3557", 0 },
+       {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
 static struct i2c_driver cs35l56_hda_i2c_driver = {
        .driver = {
-               .name           = "cs35l56-hda",
-               .pm             = &cs35l56_hda_pm_ops,
+               .name             = "cs35l56-hda",
+               .acpi_match_table = cs35l56_acpi_hda_match,
+               .pm               = &cs35l56_hda_pm_ops,
        },
        .id_table       = cs35l56_hda_i2c_id,
        .probe          = cs35l56_hda_i2c_probe,
index a3b2fa76663d3685cf404e59785cdc00c502e493..7f02155fe61e3cd529e4c688bf6d734848baf45d 100644 (file)
@@ -56,10 +56,19 @@ static const struct spi_device_id cs35l56_hda_spi_id[] = {
        {}
 };
 
+static const struct acpi_device_id cs35l56_acpi_hda_match[] = {
+       { "CSC3554", 0 },
+       { "CSC3556", 0 },
+       { "CSC3557", 0 },
+       {}
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_acpi_hda_match);
+
 static struct spi_driver cs35l56_hda_spi_driver = {
        .driver = {
-               .name           = "cs35l56-hda",
-               .pm             = &cs35l56_hda_pm_ops,
+               .name             = "cs35l56-hda",
+               .acpi_match_table = cs35l56_acpi_hda_match,
+               .pm               = &cs35l56_hda_pm_ops,
        },
        .id_table       = cs35l56_hda_spi_id,
        .probe          = cs35l56_hda_spi_probe,
index a17c36a36aa5375fd8295911a2ffc707cb14263e..cdcb28aa9d7bf028d429aeea0016cec7c6bc0c22 100644 (file)
@@ -6875,11 +6875,38 @@ static void alc287_fixup_legion_16ithg6_speakers(struct hda_codec *cdc, const st
        comp_generic_fixup(cdc, action, "i2c", "CLSA0101", "-%s:00-cs35l41-hda.%d", 2);
 }
 
+static void cs35l56_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+       comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
+static void cs35l56_fixup_i2c_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+       comp_generic_fixup(cdc, action, "i2c", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
+}
+
+static void cs35l56_fixup_spi_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+       comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 2);
+}
+
 static void cs35l56_fixup_spi_four(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
 {
        comp_generic_fixup(cdc, action, "spi", "CSC3556", "-%s:00-cs35l56-hda.%d", 4);
 }
 
+static void alc285_fixup_asus_ga403u(struct hda_codec *cdc, const struct hda_fixup *fix, int action)
+{
+       /*
+        * The same SSID has been re-used in different hardware, they have
+        * different codecs and the newer GA403U has a ALC285.
+        */
+       if (cdc->core.vendor_id == 0x10ec0285)
+               cs35l56_fixup_i2c_two(cdc, fix, action);
+       else
+               alc_fixup_inv_dmic(cdc, fix, action);
+}
+
 static void tas2781_fixup_i2c(struct hda_codec *cdc,
        const struct hda_fixup *fix, int action)
 {
@@ -7436,6 +7463,10 @@ enum {
        ALC256_FIXUP_ACER_SFG16_MICMUTE_LED,
        ALC256_FIXUP_HEADPHONE_AMP_VOL,
        ALC245_FIXUP_HP_SPECTRE_X360_EU0XXX,
+       ALC285_FIXUP_CS35L56_SPI_2,
+       ALC285_FIXUP_CS35L56_I2C_2,
+       ALC285_FIXUP_CS35L56_I2C_4,
+       ALC285_FIXUP_ASUS_GA403U,
 };
 
 /* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -9643,6 +9674,22 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc245_fixup_hp_spectre_x360_eu0xxx,
        },
+       [ALC285_FIXUP_CS35L56_SPI_2] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs35l56_fixup_spi_two,
+       },
+       [ALC285_FIXUP_CS35L56_I2C_2] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs35l56_fixup_i2c_two,
+       },
+       [ALC285_FIXUP_CS35L56_I2C_4] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs35l56_fixup_i2c_four,
+       },
+       [ALC285_FIXUP_ASUS_GA403U] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc285_fixup_asus_ga403u,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -10096,7 +10143,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1a83, "ASUS UM5302LA", ALC294_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1a8f, "ASUS UX582ZS", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1b11, "ASUS UX431DA", ALC294_FIXUP_ASUS_COEF_1B),
-       SND_PCI_QUIRK(0x1043, 0x1b13, "Asus U41SV", ALC269_FIXUP_INV_DMIC),
+       SND_PCI_QUIRK(0x1043, 0x1b13, "ASUS U41SV/GA403U", ALC285_FIXUP_ASUS_GA403U),
        SND_PCI_QUIRK(0x1043, 0x1b93, "ASUS G614JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1bbd, "ASUS Z550MA", ALC255_FIXUP_ASUS_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1043, 0x1c03, "ASUS UM3406HA", ALC287_FIXUP_CS35L41_I2C_2),
@@ -10104,6 +10151,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1c33, "ASUS UX5304MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c43, "ASUS UX8406MA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c62, "ASUS GU603", ALC289_FIXUP_ASUS_GA401),
+       SND_PCI_QUIRK(0x1043, 0x1c63, "ASUS GU605M", ALC285_FIXUP_CS35L56_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1c92, "ASUS ROG Strix G15", ALC285_FIXUP_ASUS_G533Z_PINS),
        SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JU/JV/JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
        SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JY/JZ/JI/JG", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
@@ -10115,11 +10163,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
        SND_PCI_QUIRK(0x1043, 0x1da2, "ASUS UP6502ZA/ZD", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x1df3, "ASUS UM5606", ALC285_FIXUP_CS35L56_I2C_4),
        SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402ZA", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x1e11, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA502),
        SND_PCI_QUIRK(0x1043, 0x1e12, "ASUS UM3402", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS),
        SND_PCI_QUIRK(0x1043, 0x1e5e, "ASUS ROG Strix G513", ALC294_FIXUP_ASUS_G513_PINS),
+       SND_PCI_QUIRK(0x1043, 0x1e63, "ASUS H7606W", ALC285_FIXUP_CS35L56_I2C_2),
+       SND_PCI_QUIRK(0x1043, 0x1e83, "ASUS GA605W", ALC285_FIXUP_CS35L56_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401),
        SND_PCI_QUIRK(0x1043, 0x1ee2, "ASUS UM6702RA/RC", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x1043, 0x1c52, "ASUS Zephyrus G15 2022", ALC289_FIXUP_ASUS_GA401),
@@ -10133,7 +10184,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1043, 0x3a30, "ASUS G814JVR/JIR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x3a40, "ASUS G814JZR", ALC245_FIXUP_CS35L41_SPI_2),
        SND_PCI_QUIRK(0x1043, 0x3a50, "ASUS G834JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
-       SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC245_FIXUP_CS35L41_SPI_2),
+       SND_PCI_QUIRK(0x1043, 0x3a60, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
        SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC),
        SND_PCI_QUIRK(0x1043, 0x8398, "ASUS P1005", ALC269_FIXUP_STEREO_DMIC),
@@ -10159,7 +10210,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x10ec, 0x1254, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
        SND_PCI_QUIRK(0x10ec, 0x12cc, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
        SND_PCI_QUIRK(0x10ec, 0x12f6, "Intel Reference board", ALC295_FIXUP_CHROME_BOOK),
-       SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_HEADSET_MODE),
+       SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-SZ6", ALC269_FIXUP_ASPIRE_HEADSET_MIC),
        SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC),
        SND_PCI_QUIRK(0x144d, 0xc169, "Samsung Notebook 9 Pen (NP930SBE-K01US)", ALC298_FIXUP_SAMSUNG_AMP),
        SND_PCI_QUIRK(0x144d, 0xc176, "Samsung Notebook 9 Pro (NP930MBE-K04US)", ALC298_FIXUP_SAMSUNG_AMP),
@@ -10333,6 +10384,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x17aa, 0x3869, "Lenovo Yoga7 14IAL7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN),
        SND_PCI_QUIRK(0x17aa, 0x386f, "Legion 7i 16IAX7", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x17aa, 0x3870, "Lenovo Yoga 7 14ARB7", ALC287_FIXUP_YOGA7_14ARB7_I2C),
+       SND_PCI_QUIRK(0x17aa, 0x3877, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
+       SND_PCI_QUIRK(0x17aa, 0x3878, "Lenovo Legion 7 Slim 16ARHA7", ALC287_FIXUP_CS35L41_I2C_2),
        SND_PCI_QUIRK(0x17aa, 0x387d, "Yoga S780-16 pro Quad AAC", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x387e, "Yoga S780-16 pro Quad YC", ALC287_FIXUP_TAS2781_I2C),
        SND_PCI_QUIRK(0x17aa, 0x3881, "YB9 dual power mode2 YC", ALC287_FIXUP_TAS2781_I2C),
@@ -10403,6 +10456,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1d05, 0x1147, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
        SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
        SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
index 8c8b1dcac6281c1d7448a529ba316d5346fd9030..5f35b90eab8d3f1aa46e6d4ea6bdd8d6d49638a7 100644 (file)
@@ -115,7 +115,10 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
                goto unregister_dmic_dev;
        }
 
-       acp_init(chip);
+       ret = acp_init(chip);
+       if (ret)
+               goto unregister_dmic_dev;
+
        res = devm_kcalloc(&pci->dev, num_res, sizeof(struct resource), GFP_KERNEL);
        if (!res) {
                ret = -ENOMEM;
@@ -133,11 +136,9 @@ static int acp_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id
                }
        }
 
-       if (flag == FLAG_AMD_LEGACY_ONLY_DMIC) {
-               ret = check_acp_pdm(pci, chip);
-               if (ret < 0)
-                       goto skip_pdev_creation;
-       }
+       ret = check_acp_pdm(pci, chip);
+       if (ret < 0)
+               goto skip_pdev_creation;
 
        chip->flag = flag;
        memset(&pdevinfo, 0, sizeof(pdevinfo));
index 01ef4db5407da52cc04c6813ba07970c24d91541..287ac01a387357beffe2a8f76ab5f3ec15dc7b3d 100644 (file)
@@ -56,6 +56,11 @@ static int _cs_amp_write_cal_coeffs(struct cs_dsp *dsp,
        dev_dbg(dsp->dev, "Calibration: Ambient=%#x, Status=%#x, CalR=%d\n",
                data->calAmbient, data->calStatus, data->calR);
 
+       if (list_empty(&dsp->ctl_list)) {
+               dev_info(dsp->dev, "Calibration disabled due to missing firmware controls\n");
+               return -ENOENT;
+       }
+
        ret = cs_amp_write_cal_coeff(dsp, controls, controls->ambient, data->calAmbient);
        if (ret)
                return ret;
index 860d5cda67bffe83c5c4934497cdf8c1c510922b..94685449f0f48c9b7bd534b1947da07ab26fad53 100644 (file)
@@ -2364,7 +2364,8 @@ static int cs42l43_codec_runtime_resume(struct device *dev)
 
 static int cs42l43_codec_suspend(struct device *dev)
 {
-       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+       struct cs42l43_codec *priv = dev_get_drvdata(dev);
+       struct cs42l43 *cs42l43 = priv->core;
 
        disable_irq(cs42l43->irq);
 
@@ -2373,7 +2374,8 @@ static int cs42l43_codec_suspend(struct device *dev)
 
 static int cs42l43_codec_suspend_noirq(struct device *dev)
 {
-       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+       struct cs42l43_codec *priv = dev_get_drvdata(dev);
+       struct cs42l43 *cs42l43 = priv->core;
 
        enable_irq(cs42l43->irq);
 
@@ -2382,7 +2384,8 @@ static int cs42l43_codec_suspend_noirq(struct device *dev)
 
 static int cs42l43_codec_resume(struct device *dev)
 {
-       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+       struct cs42l43_codec *priv = dev_get_drvdata(dev);
+       struct cs42l43 *cs42l43 = priv->core;
 
        enable_irq(cs42l43->irq);
 
@@ -2391,7 +2394,8 @@ static int cs42l43_codec_resume(struct device *dev)
 
 static int cs42l43_codec_resume_noirq(struct device *dev)
 {
-       struct cs42l43 *cs42l43 = dev_get_drvdata(dev);
+       struct cs42l43_codec *priv = dev_get_drvdata(dev);
+       struct cs42l43 *cs42l43 = priv->core;
 
        disable_irq(cs42l43->irq);
 
index 15289dadafea091d2693149e600d72e0cbb975c0..17bd6b5160772e01d8597767868a9d2472cae276 100644 (file)
@@ -412,9 +412,9 @@ static const struct _coeff_div coeff_div_v3[] = {
        {125, 48000, 6000000, 0x04, 0x04, 0x1F, 0x2D, 0x8A, 0x0A, 0x27, 0x27},
 
        {128, 8000, 1024000, 0x60, 0x00, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
-       {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
-       {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
-       {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+       {128, 16000, 2048000, 0x20, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+       {128, 44100, 5644800, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+       {128, 48000, 6144000, 0xE0, 0x00, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
        {144, 8000, 1152000, 0x20, 0x00, 0x03, 0x35, 0x8A, 0x1B, 0x23, 0x47},
        {144, 16000, 2304000, 0x20, 0x00, 0x11, 0x35, 0x8A, 0x1B, 0x23, 0x47},
        {192, 8000, 1536000, 0x60, 0x02, 0x0D, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
@@ -423,10 +423,10 @@ static const struct _coeff_div coeff_div_v3[] = {
 
        {200, 48000, 9600000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
        {250, 48000, 12000000, 0x04, 0x04, 0x0F, 0x2D, 0xCA, 0x0A, 0x27, 0x27},
-       {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
-       {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
-       {256, 44100, 11289600, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
-       {256, 48000, 12288000, 0xE0, 0x00, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+       {256, 8000, 2048000, 0x60, 0x00, 0x31, 0x35, 0x08, 0x19, 0x1F, 0x7F},
+       {256, 16000, 4096000, 0x20, 0x00, 0x01, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+       {256, 44100, 11289600, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+       {256, 48000, 12288000, 0xE0, 0x01, 0x01, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
        {288, 8000, 2304000, 0x20, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x23, 0x47},
        {384, 8000, 3072000, 0x60, 0x02, 0x05, 0x75, 0x8A, 0x1B, 0x1F, 0x7F},
        {384, 16000, 6144000, 0x20, 0x02, 0x03, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
@@ -435,10 +435,10 @@ static const struct _coeff_div coeff_div_v3[] = {
 
        {400, 48000, 19200000, 0xE4, 0x04, 0x35, 0x6d, 0xCA, 0x0A, 0x1F, 0x1F},
        {500, 48000, 24000000, 0xF8, 0x04, 0x3F, 0x6D, 0xCA, 0x0A, 0x1F, 0x1F},
-       {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
-       {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
-       {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
-       {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
+       {512, 8000, 4096000, 0x60, 0x00, 0x01, 0x08, 0x19, 0x1B, 0x1F, 0x7F},
+       {512, 16000, 8192000, 0x20, 0x00, 0x30, 0x35, 0x08, 0x19, 0x1F, 0x3F},
+       {512, 44100, 22579200, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
+       {512, 48000, 24576000, 0xE0, 0x00, 0x00, 0x2D, 0x48, 0x08, 0x1F, 0x1F},
        {768, 8000, 6144000, 0x60, 0x02, 0x11, 0x35, 0x8A, 0x1B, 0x1F, 0x7F},
        {768, 16000, 12288000, 0x20, 0x02, 0x01, 0x35, 0x8A, 0x1B, 0x1F, 0x3F},
        {768, 32000, 24576000, 0xE0, 0x02, 0x30, 0x2D, 0xCA, 0x0A, 0x1F, 0x1F},
@@ -835,7 +835,6 @@ static void es8326_jack_detect_handler(struct work_struct *work)
                        dev_dbg(comp->dev, "Report hp remove event\n");
                        snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET);
                        /* mute adc when mic path switch */
-                       regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
                        regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44);
                        regmap_write(es8326->regmap, ES8326_ADC2_SRC, 0x66);
                        es8326->hp = 0;
@@ -843,6 +842,7 @@ static void es8326_jack_detect_handler(struct work_struct *work)
                regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
                regmap_write(es8326->regmap, ES8326_SYS_BIAS, 0x0a);
                regmap_update_bits(es8326->regmap, ES8326_HP_DRIVER_REF, 0x0f, 0x03);
+               regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
                /*
                 * Inverted HPJACK_POL bit to trigger one IRQ to double check HP Removal event
                 */
@@ -865,6 +865,8 @@ static void es8326_jack_detect_handler(struct work_struct *work)
                         * set auto-check mode, then restart jack_detect_work after 400ms.
                         * Don't report jack status.
                         */
+                       regmap_write(es8326->regmap, ES8326_INT_SOURCE,
+                                       (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
                        regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x01);
                        es8326_enable_micbias(es8326->component);
                        usleep_range(50000, 70000);
@@ -891,7 +893,6 @@ static void es8326_jack_detect_handler(struct work_struct *work)
                        snd_soc_jack_report(es8326->jack,
                                        SND_JACK_HEADSET, SND_JACK_HEADSET);
 
-                       regmap_write(es8326->regmap, ES8326_ADC_SCALE, 0x33);
                        regmap_update_bits(es8326->regmap, ES8326_PGA_PDN,
                                        0x08, 0x08);
                        regmap_update_bits(es8326->regmap, ES8326_PGAGAIN,
@@ -987,7 +988,7 @@ static int es8326_resume(struct snd_soc_component *component)
        regmap_write(es8326->regmap, ES8326_VMIDSEL, 0x0E);
        regmap_write(es8326->regmap, ES8326_ANA_LP, 0xf0);
        usleep_range(10000, 15000);
-       regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xe9);
+       regmap_write(es8326->regmap, ES8326_HPJACK_TIMER, 0xd9);
        regmap_write(es8326->regmap, ES8326_ANA_MICBIAS, 0xcb);
        /* set headphone default type and detect pin */
        regmap_write(es8326->regmap, ES8326_HPDET_TYPE, 0x83);
@@ -1038,8 +1039,7 @@ static int es8326_resume(struct snd_soc_component *component)
        es8326_enable_micbias(es8326->component);
        usleep_range(50000, 70000);
        regmap_update_bits(es8326->regmap, ES8326_HPDET_TYPE, 0x03, 0x00);
-       regmap_write(es8326->regmap, ES8326_INT_SOURCE,
-                   (ES8326_INT_SRC_PIN9 | ES8326_INT_SRC_BUTTON));
+       regmap_write(es8326->regmap, ES8326_INT_SOURCE, ES8326_INT_SRC_PIN9);
        regmap_write(es8326->regmap, ES8326_INTOUT_IO,
                     es8326->interrupt_clk);
        regmap_write(es8326->regmap, ES8326_SDINOUT1_IO,
@@ -1060,6 +1060,8 @@ static int es8326_resume(struct snd_soc_component *component)
        es8326->hp = 0;
        es8326->hpl_vol = 0x03;
        es8326->hpr_vol = 0x03;
+
+       es8326_irq(es8326->irq, es8326);
        return 0;
 }
 
@@ -1070,6 +1072,9 @@ static int es8326_suspend(struct snd_soc_component *component)
        cancel_delayed_work_sync(&es8326->jack_detect_work);
        es8326_disable_micbias(component);
        es8326->calibrated = false;
+       regmap_write(es8326->regmap, ES8326_CLK_MUX, 0x2d);
+       regmap_write(es8326->regmap, ES8326_DAC2HPMIX, 0x00);
+       regmap_write(es8326->regmap, ES8326_ANA_PDN, 0x3b);
        regmap_write(es8326->regmap, ES8326_CLK_CTL, ES8326_CLK_OFF);
        regcache_cache_only(es8326->regmap, true);
        regcache_mark_dirty(es8326->regmap);
index ee12caef810532380cdf1b5d6b0b204afef78e63..c3e52e7bdef57de0377cb7b467bf6fd8fd62b8c9 100644 (file)
 #define ES8326_MUTE (3 << 0)
 
 /* ES8326_CLK_CTL */
-#define ES8326_CLK_ON (0x7e << 0)
+#define ES8326_CLK_ON (0x7f << 0)
 #define ES8326_CLK_OFF (0 << 0)
 
 /* ES8326_CLK_INV */
index 47511f70119ae3b1d810ce8561d6026ccbbd98da..0b3bf920bcab2307c0107387e0ad728552bb6b9c 100644 (file)
@@ -537,7 +537,7 @@ static int rt1316_sdw_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt1316->sdw_slave, &stream_config,
                                &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
@@ -577,12 +577,12 @@ static int rt1316_sdw_parse_dt(struct rt1316_sdw_priv *rt1316, struct device *de
        if (rt1316->bq_params_cnt) {
                rt1316->bq_params = devm_kzalloc(dev, rt1316->bq_params_cnt, GFP_KERNEL);
                if (!rt1316->bq_params) {
-                       dev_err(dev, "Could not allocate bq_params memory\n");
+                       dev_err(dev, "%s: Could not allocate bq_params memory\n", __func__);
                        ret = -ENOMEM;
                } else {
                        ret = device_property_read_u8_array(dev, "realtek,bq-params", rt1316->bq_params, rt1316->bq_params_cnt);
                        if (ret < 0)
-                               dev_err(dev, "Could not read list of realtek,bq-params\n");
+                               dev_err(dev, "%s: Could not read list of realtek,bq-params\n", __func__);
                }
        }
 
@@ -759,7 +759,7 @@ static int __maybe_unused rt1316_dev_resume(struct device *dev)
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT1316_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index ff364bde4a084943d78da479dc876f7b328eb02b..462c9a4b1be5ddb27c078b4c49e9c2ee3737e467 100644 (file)
@@ -606,7 +606,7 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt1318->sdw_slave, &stream_config,
                                &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
@@ -631,8 +631,8 @@ static int rt1318_sdw_hw_params(struct snd_pcm_substream *substream,
                sampling_rate = RT1318_SDCA_RATE_192000HZ;
                break;
        default:
-               dev_err(component->dev, "Rate %d is not supported\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Rate %d is not supported\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
@@ -835,7 +835,7 @@ static int __maybe_unused rt1318_dev_resume(struct device *dev)
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT1318_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                return -ETIMEDOUT;
        }
 
index e67c2e19cb1a7291170ada3b69cbdda4aadb8b6c..f9ee42c13dbac34afd0f79ff5299050106d82357 100644 (file)
@@ -132,7 +132,7 @@ static int rt5682_sdw_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt5682->slave, &stream_config,
                                      &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
@@ -315,8 +315,8 @@ static int rt5682_sdw_init(struct device *dev, struct regmap *regmap,
                                          &rt5682_sdw_indirect_regmap);
        if (IS_ERR(rt5682->regmap)) {
                ret = PTR_ERR(rt5682->regmap);
-               dev_err(dev, "Failed to allocate register map: %d\n",
-                       ret);
+               dev_err(dev, "%s: Failed to allocate register map: %d\n",
+                       __func__, ret);
                return ret;
        }
 
@@ -400,7 +400,7 @@ static int rt5682_io_init(struct device *dev, struct sdw_slave *slave)
        }
 
        if (val != DEVICE_ID) {
-               dev_err(dev, "Device with ID register %x is not rt5682\n", val);
+               dev_err(dev, "%s: Device with ID register %x is not rt5682\n", __func__, val);
                ret = -ENODEV;
                goto err_nodev;
        }
@@ -648,7 +648,7 @@ static int rt5682_bus_config(struct sdw_slave *slave,
 
        ret = rt5682_clock_config(&slave->dev);
        if (ret < 0)
-               dev_err(&slave->dev, "Invalid clk config");
+               dev_err(&slave->dev, "%s: Invalid clk config", __func__);
 
        return ret;
 }
@@ -763,19 +763,19 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
                return 0;
 
        if (!slave->unattach_request) {
+               mutex_lock(&rt5682->disable_irq_lock);
                if (rt5682->disable_irq == true) {
-                       mutex_lock(&rt5682->disable_irq_lock);
                        sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
                        rt5682->disable_irq = false;
-                       mutex_unlock(&rt5682->disable_irq_lock);
                }
+               mutex_unlock(&rt5682->disable_irq_lock);
                goto regmap_sync;
        }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index 0ebf344a1b6094a38a4c38c6817b8cf0c9242f48..434b926f96c8376c1c5b8b73c37b01b2d64641fe 100644 (file)
@@ -37,8 +37,8 @@ static int rt700_index_write(struct regmap *regmap,
 
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
-               pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+               pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                      __func__, addr, value, ret);
 
        return ret;
 }
@@ -52,8 +52,8 @@ static int rt700_index_read(struct regmap *regmap,
        *value = 0;
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
-               pr_err("Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+               pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+                      __func__, addr, *value, ret);
 
        return ret;
 }
@@ -930,14 +930,14 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream,
                port_config.num += 2;
                break;
        default:
-               dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+               dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
                return -EINVAL;
        }
 
        retval = sdw_stream_add_slave(rt700->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
@@ -945,8 +945,8 @@ static int rt700_pcm_hw_params(struct snd_pcm_substream *substream,
                /* bit 3:0 Number of Channel */
                val |= (params_channels(params) - 1);
        } else {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
index 935e597022d3242187b378107e302a36bedd17f5..2636c2eea4bc8be6af732d3c2d5f03b8d78be22f 100644 (file)
@@ -438,20 +438,20 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
                return 0;
 
        if (!slave->unattach_request) {
+               mutex_lock(&rt711->disable_irq_lock);
                if (rt711->disable_irq == true) {
-                       mutex_lock(&rt711->disable_irq_lock);
                        sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
                        sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
                        rt711->disable_irq = false;
-                       mutex_unlock(&rt711->disable_irq_lock);
                }
+               mutex_unlock(&rt711->disable_irq_lock);
                goto regmap_sync;
        }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT711_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index 447154cb60104d31bb66ef268d7b60c513e273e3..1e8dbfc3ecd969be3a87cb5f00aed853a56e2a41 100644 (file)
@@ -36,8 +36,8 @@ static int rt711_sdca_index_write(struct rt711_sdca_priv *rt711,
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt711->slave->dev,
-                       "Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+                       "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                       __func__, addr, value, ret);
 
        return ret;
 }
@@ -52,8 +52,8 @@ static int rt711_sdca_index_read(struct rt711_sdca_priv *rt711,
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt711->slave->dev,
-                       "Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+                       "%s: Failed to get private value: %06x => %04x ret=%d\n",
+                       __func__, addr, *value, ret);
 
        return ret;
 }
@@ -1293,13 +1293,13 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt711->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
        if (params_channels(params) > 16) {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
@@ -1318,8 +1318,8 @@ static int rt711_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
                sampling_rate = RT711_SDCA_RATE_192000HZ;
                break;
        default:
-               dev_err(component->dev, "Rate %d is not supported\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Rate %d is not supported\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
index 3f5773310ae8cc3b5d94f76aa481724ac35bad0a..0d3b43dd22e63d2343b0ce882166ca7bedc7b67c 100644 (file)
@@ -408,7 +408,7 @@ static int rt711_bus_config(struct sdw_slave *slave,
 
        ret = rt711_clock_config(&slave->dev);
        if (ret < 0)
-               dev_err(&slave->dev, "Invalid clk config");
+               dev_err(&slave->dev, "%s: Invalid clk config", __func__);
 
        return ret;
 }
@@ -536,19 +536,19 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
                return 0;
 
        if (!slave->unattach_request) {
+               mutex_lock(&rt711->disable_irq_lock);
                if (rt711->disable_irq == true) {
-                       mutex_lock(&rt711->disable_irq_lock);
                        sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
                        rt711->disable_irq = false;
-                       mutex_unlock(&rt711->disable_irq_lock);
                }
+               mutex_unlock(&rt711->disable_irq_lock);
                goto regmap_sync;
        }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT711_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                return -ETIMEDOUT;
        }
 
index 66eaed13b0d6a06ff1a649be6924e16850e65997..5446f9506a16722e8a43571631d109fa27c9fe65 100644 (file)
@@ -37,8 +37,8 @@ static int rt711_index_write(struct regmap *regmap,
 
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
-               pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+               pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                      __func__, addr, value, ret);
 
        return ret;
 }
@@ -52,8 +52,8 @@ static int rt711_index_read(struct regmap *regmap,
        *value = 0;
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
-               pr_err("Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+               pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+                      __func__, addr, *value, ret);
 
        return ret;
 }
@@ -428,7 +428,7 @@ static void rt711_jack_init(struct rt711_priv *rt711)
                                RT711_HP_JD_FINAL_RESULT_CTL_JD12);
                        break;
                default:
-                       dev_warn(rt711->component->dev, "Wrong JD source\n");
+                       dev_warn(rt711->component->dev, "%s: Wrong JD source\n", __func__);
                        break;
                }
 
@@ -1020,7 +1020,7 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt711->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
@@ -1028,8 +1028,8 @@ static int rt711_pcm_hw_params(struct snd_pcm_substream *substream,
                /* bit 3:0 Number of Channel */
                val |= (params_channels(params) - 1);
        } else {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
index 0926b26619bd45b69f5b0b4b5508d677ca207766..012b79e72cf6b64e1e5c2837aaf034237cddcaa6 100644 (file)
@@ -139,8 +139,8 @@ static int rt712_sdca_dmic_index_write(struct rt712_sdca_dmic_priv *rt712,
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt712->slave->dev,
-                       "Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+                       "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                       __func__, addr, value, ret);
 
        return ret;
 }
@@ -155,8 +155,8 @@ static int rt712_sdca_dmic_index_read(struct rt712_sdca_dmic_priv *rt712,
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt712->slave->dev,
-                       "Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+                       "%s: Failed to get private value: %06x => %04x ret=%d\n",
+                       __func__, addr, *value, ret);
 
        return ret;
 }
@@ -317,7 +317,8 @@ static int rt712_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol,
        for (i = 0; i < p->count; i++) {
                err = regmap_write(rt712->mbq_regmap, p->reg_base + i, gain_val[i]);
                if (err < 0)
-                       dev_err(&rt712->slave->dev, "0x%08x can't be set\n", p->reg_base + i);
+                       dev_err(&rt712->slave->dev, "%s: 0x%08x can't be set\n",
+                               __func__, p->reg_base + i);
        }
 
        return changed;
@@ -667,13 +668,13 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt712->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
        if (params_channels(params) > 4) {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
@@ -698,8 +699,8 @@ static int rt712_sdca_dmic_hw_params(struct snd_pcm_substream *substream,
                sampling_rate = RT712_SDCA_RATE_192000HZ;
                break;
        default:
-               dev_err(component->dev, "Rate %d is not supported\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Rate %d is not supported\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
@@ -923,7 +924,8 @@ static int __maybe_unused rt712_sdca_dmic_dev_resume(struct device *dev)
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT712_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n",
+                       __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index 01ac555cd79b84a0b1aabe57899b1fedc214a2b5..4e9ab3ef135b34946d37d6280a9afb568cceef51 100644 (file)
@@ -438,20 +438,21 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
                return 0;
 
        if (!slave->unattach_request) {
+               mutex_lock(&rt712->disable_irq_lock);
                if (rt712->disable_irq == true) {
-                       mutex_lock(&rt712->disable_irq_lock);
+
                        sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
                        sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
                        rt712->disable_irq = false;
-                       mutex_unlock(&rt712->disable_irq_lock);
                }
+               mutex_unlock(&rt712->disable_irq_lock);
                goto regmap_sync;
        }
 
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                msecs_to_jiffies(RT712_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index 6954fbe7ec5f3bb79f8693c23f302a7a1003e11e..b503de9fda80e71cbe78e8916a6a7f41286ac5b2 100644 (file)
@@ -34,8 +34,8 @@ static int rt712_sdca_index_write(struct rt712_sdca_priv *rt712,
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt712->slave->dev,
-                       "Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+                       "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                       __func__, addr, value, ret);
 
        return ret;
 }
@@ -50,8 +50,8 @@ static int rt712_sdca_index_read(struct rt712_sdca_priv *rt712,
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt712->slave->dev,
-                       "Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+                       "%s: Failed to get private value: %06x => %04x ret=%d\n",
+                       __func__, addr, *value, ret);
 
        return ret;
 }
@@ -1060,13 +1060,13 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt712->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
        if (params_channels(params) > 16) {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
@@ -1085,8 +1085,8 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
                sampling_rate = RT712_SDCA_RATE_192000HZ;
                break;
        default:
-               dev_err(component->dev, "Rate %d is not supported\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Rate %d is not supported\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
@@ -1106,7 +1106,7 @@ static int rt712_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
                        sampling_rate);
                break;
        default:
-               dev_err(component->dev, "Wrong DAI id\n");
+               dev_err(component->dev, "%s: Wrong DAI id\n", __func__);
                return -EINVAL;
        }
 
index ab54a67a27ebbfc8fbe19837fa07ed7d084bd429..ee450126106f969588ab52b83434309f8cfb8036 100644 (file)
@@ -237,7 +237,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev)
        time = wait_for_completion_timeout(&slave->enumeration_complete,
                                           msecs_to_jiffies(RT715_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Enumeration not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Enumeration not complete, timed out\n", __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index 4533eedd7e189f3b48e36175eb5494f20a6f1be0..3fb7b9adb61de628705d784fbe64e259bf031089 100644 (file)
@@ -41,8 +41,8 @@ static int rt715_sdca_index_write(struct rt715_sdca_priv *rt715,
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt715->slave->dev,
-                       "Failed to set private value: %08x <= %04x %d\n",
-                       addr, value, ret);
+                       "%s: Failed to set private value: %08x <= %04x %d\n",
+                       __func__, addr, value, ret);
 
        return ret;
 }
@@ -59,8 +59,8 @@ static int rt715_sdca_index_read(struct rt715_sdca_priv *rt715,
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt715->slave->dev,
-                               "Failed to get private value: %06x => %04x ret=%d\n",
-                               addr, *value, ret);
+                       "%s: Failed to get private value: %06x => %04x ret=%d\n",
+                       __func__, addr, *value, ret);
 
        return ret;
 }
@@ -152,8 +152,8 @@ static int rt715_sdca_set_amp_gain_put(struct snd_kcontrol *kcontrol,
                                mc->shift);
                ret = regmap_write(rt715->mbq_regmap, mc->reg + i, gain_val);
                if (ret != 0) {
-                       dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
-                               mc->reg + i, gain_val);
+                       dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+                               __func__, mc->reg + i, gain_val);
                        return ret;
                }
        }
@@ -188,8 +188,8 @@ static int rt715_sdca_set_amp_gain_4ch_put(struct snd_kcontrol *kcontrol,
                ret = regmap_write(rt715->mbq_regmap, reg_base + i,
                                gain_val);
                if (ret != 0) {
-                       dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
-                               reg_base + i, gain_val);
+                       dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+                               __func__, reg_base + i, gain_val);
                        return ret;
                }
        }
@@ -224,8 +224,8 @@ static int rt715_sdca_set_amp_gain_8ch_put(struct snd_kcontrol *kcontrol,
                reg = i < 7 ? reg_base + i : (reg_base - 1) | BIT(15);
                ret = regmap_write(rt715->mbq_regmap, reg, gain_val);
                if (ret != 0) {
-                       dev_err(component->dev, "Failed to write 0x%x=0x%x\n",
-                               reg, gain_val);
+                       dev_err(component->dev, "%s: Failed to write 0x%x=0x%x\n",
+                               __func__, reg, gain_val);
                        return ret;
                }
        }
@@ -246,8 +246,8 @@ static int rt715_sdca_set_amp_gain_get(struct snd_kcontrol *kcontrol,
        for (i = 0; i < 2; i++) {
                ret = regmap_read(rt715->mbq_regmap, mc->reg + i, &val);
                if (ret < 0) {
-                       dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
-                               mc->reg + i, ret);
+                       dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+                               __func__, mc->reg + i, ret);
                        return ret;
                }
                ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, mc->shift);
@@ -271,8 +271,8 @@ static int rt715_sdca_set_amp_gain_4ch_get(struct snd_kcontrol *kcontrol,
        for (i = 0; i < 4; i++) {
                ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val);
                if (ret < 0) {
-                       dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
-                               reg_base + i, ret);
+                       dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+                               __func__, reg_base + i, ret);
                        return ret;
                }
                ucontrol->value.integer.value[i] = rt715_sdca_get_gain(val, gain_sft);
@@ -297,8 +297,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
        for (i = 0; i < 8; i += 2) {
                ret = regmap_read(rt715->mbq_regmap, reg_base + i, &val_l);
                if (ret < 0) {
-                       dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
-                                       reg_base + i, ret);
+                       dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+                               __func__, reg_base + i, ret);
                        return ret;
                }
                ucontrol->value.integer.value[i] = (val_l >> gain_sft) / 10;
@@ -306,8 +306,8 @@ static int rt715_sdca_set_amp_gain_8ch_get(struct snd_kcontrol *kcontrol,
                reg = (i == 6) ? (reg_base - 1) | BIT(15) : reg_base + 1 + i;
                ret = regmap_read(rt715->mbq_regmap, reg, &val_r);
                if (ret < 0) {
-                       dev_err(component->dev, "Failed to read 0x%x, ret=%d\n",
-                                       reg, ret);
+                       dev_err(component->dev, "%s: Failed to read 0x%x, ret=%d\n",
+                               __func__, reg, ret);
                        return ret;
                }
                ucontrol->value.integer.value[i + 1] = (val_r >> gain_sft) / 10;
@@ -834,15 +834,15 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
                        0xaf00);
                break;
        default:
-               dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+               dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
                return -EINVAL;
        }
 
        retval = sdw_stream_add_slave(rt715->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(component->dev, "Unable to configure port, retval:%d\n",
-                       retval);
+               dev_err(component->dev, "%s: Unable to configure port, retval:%d\n",
+                       __func__, retval);
                return retval;
        }
 
@@ -893,8 +893,8 @@ static int rt715_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
                val = 0xf;
                break;
        default:
-               dev_err(component->dev, "Unsupported sample rate %d\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
index 21f37babd148a487e82568144a791bff07fdf6c0..7e13868ff99f03110c165dcd706cff46a8eeba5d 100644 (file)
@@ -482,7 +482,7 @@ static int rt715_bus_config(struct sdw_slave *slave,
 
        ret = rt715_clock_config(&slave->dev);
        if (ret < 0)
-               dev_err(&slave->dev, "Invalid clk config");
+               dev_err(&slave->dev, "%s: Invalid clk config", __func__);
 
        return 0;
 }
@@ -554,7 +554,7 @@ static int __maybe_unused rt715_dev_resume(struct device *dev)
        time = wait_for_completion_timeout(&slave->initialization_complete,
                                           msecs_to_jiffies(RT715_PROBE_TIMEOUT));
        if (!time) {
-               dev_err(&slave->dev, "Initialization not complete, timed out\n");
+               dev_err(&slave->dev, "%s: Initialization not complete, timed out\n", __func__);
                sdw_show_ping_status(slave->bus, true);
 
                return -ETIMEDOUT;
index 9f732a5abd53f37cd24382522f9dc3ab97ecd7b0..299c9b12377c6ada95a40b4a876a43dd127786be 100644 (file)
@@ -40,8 +40,8 @@ static int rt715_index_write(struct regmap *regmap, unsigned int reg,
 
        ret = regmap_write(regmap, addr, value);
        if (ret < 0) {
-               pr_err("Failed to set private value: %08x <= %04x %d\n",
-                      addr, value, ret);
+               pr_err("%s: Failed to set private value: %08x <= %04x %d\n",
+                      __func__, addr, value, ret);
        }
 
        return ret;
@@ -55,8 +55,8 @@ static int rt715_index_write_nid(struct regmap *regmap,
 
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
-               pr_err("Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+               pr_err("%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                      __func__, addr, value, ret);
 
        return ret;
 }
@@ -70,8 +70,8 @@ static int rt715_index_read_nid(struct regmap *regmap,
        *value = 0;
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
-               pr_err("Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+               pr_err("%s: Failed to get private value: %06x => %04x ret=%d\n",
+                      __func__, addr, *value, ret);
 
        return ret;
 }
@@ -862,14 +862,14 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
                rt715_index_write(rt715->regmap, RT715_SDW_INPUT_SEL, 0xa000);
                break;
        default:
-               dev_err(component->dev, "Invalid DAI id %d\n", dai->id);
+               dev_err(component->dev, "%s: Invalid DAI id %d\n", __func__, dai->id);
                return -EINVAL;
        }
 
        retval = sdw_stream_add_slave(rt715->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
@@ -883,8 +883,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
                val |= 0x0 << 8;
                break;
        default:
-               dev_err(component->dev, "Unsupported sample rate %d\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Unsupported sample rate %d\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
@@ -892,8 +892,8 @@ static int rt715_pcm_hw_params(struct snd_pcm_substream *substream,
                /* bit 3:0 Number of Channel */
                val |= (params_channels(params) - 1);
        } else {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
index eb76f4c675b67fd59df00cb41a17955c751bbd44..65d584c1886e819597577ed10551d2fe5d104e53 100644 (file)
@@ -467,13 +467,13 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
                return 0;
 
        if (!slave->unattach_request) {
+               mutex_lock(&rt722->disable_irq_lock);
                if (rt722->disable_irq == true) {
-                       mutex_lock(&rt722->disable_irq_lock);
                        sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
                        sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
                        rt722->disable_irq = false;
-                       mutex_unlock(&rt722->disable_irq_lock);
                }
+               mutex_unlock(&rt722->disable_irq_lock);
                goto regmap_sync;
        }
 
index 0e1c65a20392addb92a6bdbc39319884f4d2f9c9..e0ea3a23f7cc6844691338ff8daae7f2843d2c6e 100644 (file)
@@ -35,8 +35,8 @@ int rt722_sdca_index_write(struct rt722_sdca_priv *rt722,
        ret = regmap_write(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt722->slave->dev,
-                       "Failed to set private value: %06x <= %04x ret=%d\n",
-                       addr, value, ret);
+                       "%s: Failed to set private value: %06x <= %04x ret=%d\n",
+                       __func__, addr, value, ret);
 
        return ret;
 }
@@ -51,8 +51,8 @@ int rt722_sdca_index_read(struct rt722_sdca_priv *rt722,
        ret = regmap_read(regmap, addr, value);
        if (ret < 0)
                dev_err(&rt722->slave->dev,
-                       "Failed to get private value: %06x => %04x ret=%d\n",
-                       addr, *value, ret);
+                       "%s: Failed to get private value: %06x => %04x ret=%d\n",
+                       __func__, addr, *value, ret);
 
        return ret;
 }
@@ -663,7 +663,8 @@ static int rt722_sdca_dmic_set_gain_put(struct snd_kcontrol *kcontrol,
        for (i = 0; i < p->count; i++) {
                err = regmap_write(rt722->mbq_regmap, p->reg_base + i, gain_val[i]);
                if (err < 0)
-                       dev_err(&rt722->slave->dev, "%#08x can't be set\n", p->reg_base + i);
+                       dev_err(&rt722->slave->dev, "%s: %#08x can't be set\n",
+                               __func__, p->reg_base + i);
        }
 
        return changed;
@@ -1211,13 +1212,13 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
        retval = sdw_stream_add_slave(rt722->slave, &stream_config,
                                        &port_config, 1, sdw_stream);
        if (retval) {
-               dev_err(dai->dev, "Unable to configure port\n");
+               dev_err(dai->dev, "%s: Unable to configure port\n", __func__);
                return retval;
        }
 
        if (params_channels(params) > 16) {
-               dev_err(component->dev, "Unsupported channels %d\n",
-                       params_channels(params));
+               dev_err(component->dev, "%s: Unsupported channels %d\n",
+                       __func__, params_channels(params));
                return -EINVAL;
        }
 
@@ -1236,8 +1237,8 @@ static int rt722_sdca_pcm_hw_params(struct snd_pcm_substream *substream,
                sampling_rate = RT722_SDCA_RATE_192000HZ;
                break;
        default:
-               dev_err(component->dev, "Rate %d is not supported\n",
-                       params_rate(params));
+               dev_err(component->dev, "%s: Rate %d is not supported\n",
+                       __func__, params_rate(params));
                return -EINVAL;
        }
 
index e451c009f2d99980bab20dd5d4c55cc26bd73cd5..7d5c096e06cd32b77fc6b73f18002af63bd6c8d5 100644 (file)
@@ -683,11 +683,12 @@ static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl)
 int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type,
                      unsigned int alg, void *buf, size_t len)
 {
-       struct cs_dsp_coeff_ctl *cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
+       struct cs_dsp_coeff_ctl *cs_ctl;
        struct wm_coeff_ctl *ctl;
        int ret;
 
        mutex_lock(&dsp->cs_dsp.pwr_lock);
+       cs_ctl = cs_dsp_get_ctl(&dsp->cs_dsp, name, type, alg);
        ret = cs_dsp_coeff_write_ctrl(cs_ctl, 0, buf, len);
        mutex_unlock(&dsp->cs_dsp.pwr_lock);
 
index c018f84fe02529322455035e6ca4fff7ddf2afaf..fc072dc58968cb80d6481cb0c84a4b776bdef150 100644 (file)
@@ -296,5 +296,6 @@ static struct platform_driver avs_da7219_driver = {
 
 module_platform_driver(avs_da7219_driver);
 
+MODULE_DESCRIPTION("Intel da7219 machine driver");
 MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
 MODULE_LICENSE("GPL");
index ba2bc7f689eb603051870bfcbf28dc5640b7ac66..d9e5e85f523358d26a85218c5050fe8b31876e21 100644 (file)
@@ -96,4 +96,5 @@ static struct platform_driver avs_dmic_driver = {
 
 module_platform_driver(avs_dmic_driver);
 
+MODULE_DESCRIPTION("Intel DMIC machine driver");
 MODULE_LICENSE("GPL");
index 1090082e7d5bfcd47e92ecbd6bed22269fab3678..5c90a60075773409431f957f05f2e3bc03303334 100644 (file)
@@ -326,4 +326,5 @@ static struct platform_driver avs_es8336_driver = {
 
 module_platform_driver(avs_es8336_driver);
 
+MODULE_DESCRIPTION("Intel es8336 machine driver");
 MODULE_LICENSE("GPL");
index 28f254eb0d03fcfa6f5fc8c4bd0184d73f9c298d..027373d6a16d602c62b07d34b2b0bd984c14ae78 100644 (file)
@@ -204,4 +204,5 @@ static struct platform_driver avs_i2s_test_driver = {
 
 module_platform_driver(avs_i2s_test_driver);
 
+MODULE_DESCRIPTION("Intel i2s test machine driver");
 MODULE_LICENSE("GPL");
index a83b95f25129f90e1a0fd6f5d34e7e6fa799d34f..1ff85e4d8e160b7c61a74fc6dbdf9a32ee410614 100644 (file)
@@ -154,4 +154,5 @@ static struct platform_driver avs_max98357a_driver = {
 
 module_platform_driver(avs_max98357a_driver)
 
+MODULE_DESCRIPTION("Intel max98357a machine driver");
 MODULE_LICENSE("GPL");
index 3b980a025e6f697446419f6efec4f071e45495cb..8d31586b73eaec7c10edb319002f1b024e41480b 100644 (file)
@@ -211,4 +211,5 @@ static struct platform_driver avs_max98373_driver = {
 
 module_platform_driver(avs_max98373_driver)
 
+MODULE_DESCRIPTION("Intel max98373 machine driver");
 MODULE_LICENSE("GPL");
index 86dd2b228df3a5ce1a2751221659834cb458c775..572ec58073d06bce6e07bc7dbd05f2b9a3cf5462 100644 (file)
@@ -208,4 +208,5 @@ static struct platform_driver avs_max98927_driver = {
 
 module_platform_driver(avs_max98927_driver)
 
+MODULE_DESCRIPTION("Intel max98927 machine driver");
 MODULE_LICENSE("GPL");
index 1c1e2083f474df122259c41f774e246dd7223a1f..55db75efae41425684bdd1c67441e76cae4d4062 100644 (file)
@@ -313,4 +313,5 @@ static struct platform_driver avs_nau8825_driver = {
 
 module_platform_driver(avs_nau8825_driver)
 
+MODULE_DESCRIPTION("Intel nau8825 machine driver");
 MODULE_LICENSE("GPL");
index a9469b5ecb402f1af389c52fd6e1e5d9991cc3e9..8be6887bbc6e81cb0f6af16685524fd01b96e36a 100644 (file)
@@ -69,4 +69,5 @@ static struct platform_driver avs_probe_mb_driver = {
 
 module_platform_driver(avs_probe_mb_driver);
 
+MODULE_DESCRIPTION("Intel probe machine driver");
 MODULE_LICENSE("GPL");
index bfcb8845fd15d06ec39d7360008ac60f73491d3b..1cf52421608753e1cca23d333eab4a7a9d624b63 100644 (file)
@@ -276,4 +276,5 @@ static struct platform_driver avs_rt274_driver = {
 
 module_platform_driver(avs_rt274_driver);
 
+MODULE_DESCRIPTION("Intel rt274 machine driver");
 MODULE_LICENSE("GPL");
index 28d7d86b1cc99dabed8c76a94ee2c5dc3064582e..4740bba1057032128c60594b9339b820f9f7bc70 100644 (file)
@@ -247,4 +247,5 @@ static struct platform_driver avs_rt286_driver = {
 
 module_platform_driver(avs_rt286_driver);
 
+MODULE_DESCRIPTION("Intel rt286 machine driver");
 MODULE_LICENSE("GPL");
index 80f490b9e11842c34859ff8b6cebc8b7cee51e71..6e409e29f6974654a0a5cbe4eba105f78c055164 100644 (file)
@@ -266,4 +266,5 @@ static struct platform_driver avs_rt298_driver = {
 
 module_platform_driver(avs_rt298_driver);
 
+MODULE_DESCRIPTION("Intel rt298 machine driver");
 MODULE_LICENSE("GPL");
index 60105f453ae235c6affdcfef2181b95b101a3e1c..097ae5f73241efea14cf187e85bc88d936be1956 100644 (file)
@@ -192,4 +192,5 @@ static struct platform_driver avs_rt5514_driver = {
 
 module_platform_driver(avs_rt5514_driver);
 
+MODULE_DESCRIPTION("Intel rt5514 machine driver");
 MODULE_LICENSE("GPL");
index b4762c2a7bf2d1a3b0237479145380243861c4d8..1880c315cc4d1f9be4b7f42113382b322bd9c4cd 100644 (file)
@@ -265,4 +265,5 @@ static struct platform_driver avs_rt5663_driver = {
 
 module_platform_driver(avs_rt5663_driver);
 
+MODULE_DESCRIPTION("Intel rt5663 machine driver");
 MODULE_LICENSE("GPL");
index 243f979fda98a4d6e67b76cef8b5a192dd6a8ff7..594a971ded9eb2ea339ab2e45ae84da8b4b1dd6d 100644 (file)
@@ -341,5 +341,6 @@ static struct platform_driver avs_rt5682_driver = {
 
 module_platform_driver(avs_rt5682_driver)
 
+MODULE_DESCRIPTION("Intel rt5682 machine driver");
 MODULE_AUTHOR("Cezary Rojewski <cezary.rojewski@intel.com>");
 MODULE_LICENSE("GPL");
index 4a0e136835ff5d05118b1d802d2884beabb68a95..d6f7f046c24e5d12bd3189fe800bb05b18ee4444 100644 (file)
@@ -200,4 +200,5 @@ static struct platform_driver avs_ssm4567_driver = {
 
 module_platform_driver(avs_ssm4567_driver)
 
+MODULE_DESCRIPTION("Intel ssm4567 machine driver");
 MODULE_LICENSE("GPL");
index 2d25748ca70662bf771c6896297ccb6a0fb0798f..b27e89ff6a1673f57db6e253a818d6fbe3d1ab91 100644 (file)
@@ -263,7 +263,7 @@ int snd_soc_get_volsw(struct snd_kcontrol *kcontrol,
        int max = mc->max;
        int min = mc->min;
        int sign_bit = mc->sign_bit;
-       unsigned int mask = (1 << fls(max)) - 1;
+       unsigned int mask = (1ULL << fls(max)) - 1;
        unsigned int invert = mc->invert;
        int val;
        int ret;
index be7dc1e02284ab62f8cbaeffdd70f26a19ff6232..c12c7f820529476de0273474082b8174ab0ae052 100644 (file)
@@ -704,6 +704,10 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
                goto unregister_dev;
        }
 
+       ret = acp_init(sdev);
+       if (ret < 0)
+               goto free_smn_dev;
+
        sdev->ipc_irq = pci->irq;
        ret = request_threaded_irq(sdev->ipc_irq, acp_irq_handler, acp_irq_thread,
                                   IRQF_SHARED, "AudioDSP", sdev);
@@ -713,10 +717,6 @@ int amd_sof_acp_probe(struct snd_sof_dev *sdev)
                goto free_smn_dev;
        }
 
-       ret = acp_init(sdev);
-       if (ret < 0)
-               goto free_ipc_irq;
-
        /* scan SoundWire capabilities exposed by DSDT */
        ret = acp_sof_scan_sdw_devices(sdev, chip->sdw_acpi_dev_addr);
        if (ret < 0) {
index 9b00ede2a486a2ff2d619ab714ed2c1665eb3463..cc84d4c81be9d363d701b1d5c658e26a62079435 100644 (file)
@@ -339,8 +339,7 @@ static int sof_init_environment(struct snd_sof_dev *sdev)
        ret = snd_sof_probe(sdev);
        if (ret < 0) {
                dev_err(sdev->dev, "failed to probe DSP %d\n", ret);
-               sof_ops_free(sdev);
-               return ret;
+               goto err_sof_probe;
        }
 
        /* check machine info */
@@ -358,15 +357,18 @@ static int sof_init_environment(struct snd_sof_dev *sdev)
                ret = validate_sof_ops(sdev);
                if (ret < 0) {
                        snd_sof_remove(sdev);
+                       snd_sof_remove_late(sdev);
                        return ret;
                }
        }
 
+       return 0;
+
 err_machine_check:
-       if (ret) {
-               snd_sof_remove(sdev);
-               sof_ops_free(sdev);
-       }
+       snd_sof_remove(sdev);
+err_sof_probe:
+       snd_sof_remove_late(sdev);
+       sof_ops_free(sdev);
 
        return ret;
 }
index 2b385cddc385c5bd59e11acfe8e6bda45704fdd1..d71bb66b9991164cdb8b0ed000e461d9e3a0719c 100644 (file)
@@ -57,6 +57,9 @@ struct snd_sof_dsp_ops sof_hda_common_ops = {
        .pcm_pointer    = hda_dsp_pcm_pointer,
        .pcm_ack        = hda_dsp_pcm_ack,
 
+       .get_dai_frame_counter = hda_dsp_get_stream_llp,
+       .get_host_byte_counter = hda_dsp_get_stream_ldp,
+
        /* firmware loading */
        .load_firmware = snd_sof_load_firmware_raw,
 
index c50ca9e72d37385816ddb3cd6ef7456ed50a58e9..b073720b4cf432466e18bf8840dd87eb5efac98e 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <sound/pcm_params.h>
 #include <sound/hdaudio_ext.h>
+#include <sound/hda_register.h>
 #include <sound/hda-mlink.h>
 #include <sound/sof/ipc4/header.h>
 #include <uapi/sound/sof/header.h>
@@ -362,6 +363,16 @@ static int hda_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai,
        case SNDRV_PCM_TRIGGER_STOP:
        case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
                snd_hdac_ext_stream_clear(hext_stream);
+
+               /*
+                * Save the LLP registers in case the stream is
+                * restarting due PAUSE_RELEASE, or START without a pcm
+                * close/open since in this case the LLP register is not reset
+                * to 0 and the delay calculation will return with invalid
+                * results.
+                */
+               hext_stream->pplcllpl = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+               hext_stream->pplcllpu = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
                break;
        default:
                dev_err(sdev->dev, "unknown trigger command %d\n", cmd);
index 31ffa1a8f2ac04ddd5c31aadec5400c52757dd19..ef5c915db8ffb47a622a8c753f14dd950fb9c45c 100644 (file)
@@ -681,17 +681,27 @@ static int hda_suspend(struct snd_sof_dev *sdev, bool runtime_suspend)
        struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata;
        const struct sof_intel_dsp_desc *chip = hda->desc;
        struct hdac_bus *bus = sof_to_bus(sdev);
+       bool imr_lost = false;
        int ret, j;
 
        /*
-        * The memory used for IMR boot loses its content in deeper than S3 state
-        * We must not try IMR boot on next power up (as it will fail).
-        *
+        * The memory used for IMR boot loses its content in deeper than S3
+        * state on CAVS platforms.
+        * On ACE platforms due to the system architecture the IMR content is
+        * lost at S3 state already, they are tailored for s2idle use.
+        * We must not try IMR boot on next power up in these cases as it will
+        * fail.
+        */
+       if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
+           (chip->hw_ip_version >= SOF_INTEL_ACE_1_0 &&
+            sdev->system_suspend_target == SOF_SUSPEND_S3))
+               imr_lost = true;
+
+       /*
         * In case of firmware crash or boot failure set the skip_imr_boot to true
         * as well in order to try to re-load the firmware to do a 'cold' boot.
         */
-       if (sdev->system_suspend_target > SOF_SUSPEND_S3 ||
-           sdev->fw_state == SOF_FW_CRASHED ||
+       if (imr_lost || sdev->fw_state == SOF_FW_CRASHED ||
            sdev->fw_state == SOF_FW_BOOT_FAILED)
                hda->skip_imr_boot = true;
 
index 18f07364d2198425bffd3e111a546e11b536cd63..d7b446f3f973e3d532d8eaef241aac2f3a30a54d 100644 (file)
@@ -259,8 +259,37 @@ int hda_dsp_pcm_open(struct snd_sof_dev *sdev,
                snd_pcm_hw_constraint_mask64(substream->runtime, SNDRV_PCM_HW_PARAM_FORMAT,
                                             SNDRV_PCM_FMTBIT_S16 | SNDRV_PCM_FMTBIT_S32);
 
+       /*
+        * The dsp_max_burst_size_in_ms is the length of the maximum burst size
+        * of the host DMA in the ALSA buffer.
+        *
+        * On playback start the DMA will transfer dsp_max_burst_size_in_ms
+        * amount of data in one initial burst to fill up the host DMA buffer.
+        * Consequent DMA burst sizes are shorter and their length can vary.
+        * To make sure that userspace allocate large enough ALSA buffer we need
+        * to place a constraint on the buffer time.
+        *
+        * On capture the DMA will transfer 1ms chunks.
+        *
+        * Exact dsp_max_burst_size_in_ms constraint is racy, so set the
+        * constraint to a minimum of 2x dsp_max_burst_size_in_ms.
+        */
+       if (spcm->stream[direction].dsp_max_burst_size_in_ms)
+               snd_pcm_hw_constraint_minmax(substream->runtime,
+                       SNDRV_PCM_HW_PARAM_BUFFER_TIME,
+                       spcm->stream[direction].dsp_max_burst_size_in_ms * USEC_PER_MSEC * 2,
+                       UINT_MAX);
+
        /* binding pcm substream to hda stream */
        substream->runtime->private_data = &dsp_stream->hstream;
+
+       /*
+        * Reset the llp cache values (they are used for LLP compensation in
+        * case the counter is not reset)
+        */
+       dsp_stream->pplcllpl = 0;
+       dsp_stream->pplcllpu = 0;
+
        return 0;
 }
 
index b387b1a69d7ea3ceaed9fe814b174d9040e3eae1..0c189d3b19c1af6448d5d1264802ef493e5c7b14 100644 (file)
@@ -1063,3 +1063,73 @@ snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
 
        return pos;
 }
+
+#define merge_u64(u32_u, u32_l) (((u64)(u32_u) << 32) | (u32_l))
+
+/**
+ * hda_dsp_get_stream_llp - Retrieve the LLP (Linear Link Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+                          struct snd_soc_component *component,
+                          struct snd_pcm_substream *substream)
+{
+       struct hdac_stream *hstream = substream->runtime->private_data;
+       struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+       u32 llp_l, llp_u;
+
+       /*
+        * The pplc_addr have been calculated during probe in
+        * hda_dsp_stream_init():
+        * pplc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+        *             SOF_HDA_PPLC_BASE +
+        *             SOF_HDA_PPLC_MULTI * total_stream +
+        *             SOF_HDA_PPLC_INTERVAL * stream_index
+        *
+        * Use this pre-calculated address to avoid repeated re-calculation.
+        */
+       llp_l = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPL);
+       llp_u = readl(hext_stream->pplc_addr + AZX_REG_PPLCLLPU);
+
+       /* Compensate the LLP counter with the saved offset */
+       if (hext_stream->pplcllpl || hext_stream->pplcllpu)
+               return merge_u64(llp_u, llp_l) -
+                      merge_u64(hext_stream->pplcllpu, hext_stream->pplcllpl);
+
+       return merge_u64(llp_u, llp_l);
+}
+
+/**
+ * hda_dsp_get_stream_ldp - Retrieve the LDP (Linear DMA Position) of the stream
+ * @sdev: SOF device
+ * @component: ASoC component
+ * @substream: PCM substream
+ *
+ * Returns the raw Linear Link Position value
+ */
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+                          struct snd_soc_component *component,
+                          struct snd_pcm_substream *substream)
+{
+       struct hdac_stream *hstream = substream->runtime->private_data;
+       struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+       u32 ldp_l, ldp_u;
+
+       /*
+        * The pphc_addr have been calculated during probe in
+        * hda_dsp_stream_init():
+        * pphc_addr = sdev->bar[HDA_DSP_PP_BAR] +
+        *             SOF_HDA_PPHC_BASE +
+        *             SOF_HDA_PPHC_INTERVAL * stream_index
+        *
+        * Use this pre-calculated address to avoid repeated re-calculation.
+        */
+       ldp_l = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPL);
+       ldp_u = readl(hext_stream->pphc_addr + AZX_REG_PPHCLDPU);
+
+       return ((u64)ldp_u << 32) | ldp_l;
+}
index b36eb7c7891335a3038d5e1402d6f73ede754b81..81a1d4606d3cde8ecb1b9b2ef859c7b0393555f5 100644 (file)
@@ -662,6 +662,12 @@ bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev);
 
 snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
                                              int direction, bool can_sleep);
+u64 hda_dsp_get_stream_llp(struct snd_sof_dev *sdev,
+                          struct snd_soc_component *component,
+                          struct snd_pcm_substream *substream);
+u64 hda_dsp_get_stream_ldp(struct snd_sof_dev *sdev,
+                          struct snd_soc_component *component,
+                          struct snd_pcm_substream *substream);
 
 struct hdac_ext_stream *
        hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction, u32 flags);
index 7ae017a00184e52371052c188d75f13fbfc053df..aeb4350cce6bba3b229af876e188c4ead7f2b201 100644 (file)
@@ -29,15 +29,17 @@ static const struct snd_sof_debugfs_map lnl_dsp_debugfs[] = {
 };
 
 /* this helps allows the DSP to setup DMIC/SSP */
-static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus)
+static int hdac_bus_offload_dmic_ssp(struct hdac_bus *bus, bool enable)
 {
        int ret;
 
-       ret = hdac_bus_eml_enable_offload(bus, true,  AZX_REG_ML_LEPTR_ID_INTEL_SSP, true);
+       ret = hdac_bus_eml_enable_offload(bus, true,
+                                         AZX_REG_ML_LEPTR_ID_INTEL_SSP, enable);
        if (ret < 0)
                return ret;
 
-       ret = hdac_bus_eml_enable_offload(bus, true,  AZX_REG_ML_LEPTR_ID_INTEL_DMIC, true);
+       ret = hdac_bus_eml_enable_offload(bus, true,
+                                         AZX_REG_ML_LEPTR_ID_INTEL_DMIC, enable);
        if (ret < 0)
                return ret;
 
@@ -52,7 +54,19 @@ static int lnl_hda_dsp_probe(struct snd_sof_dev *sdev)
        if (ret < 0)
                return ret;
 
-       return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+       return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
+}
+
+static void lnl_hda_dsp_remove(struct snd_sof_dev *sdev)
+{
+       int ret;
+
+       ret = hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), false);
+       if (ret < 0)
+               dev_warn(sdev->dev,
+                        "Failed to disable offload for DMIC/SSP: %d\n", ret);
+
+       hda_dsp_remove(sdev);
 }
 
 static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
@@ -63,7 +77,7 @@ static int lnl_hda_dsp_resume(struct snd_sof_dev *sdev)
        if (ret < 0)
                return ret;
 
-       return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+       return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
 }
 
 static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
@@ -74,7 +88,7 @@ static int lnl_hda_dsp_runtime_resume(struct snd_sof_dev *sdev)
        if (ret < 0)
                return ret;
 
-       return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev));
+       return hdac_bus_offload_dmic_ssp(sof_to_bus(sdev), true);
 }
 
 static int lnl_dsp_post_fw_run(struct snd_sof_dev *sdev)
@@ -97,9 +111,11 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev)
        /* common defaults */
        memcpy(&sof_lnl_ops, &sof_hda_common_ops, sizeof(struct snd_sof_dsp_ops));
 
-       /* probe */
-       if (!sdev->dspless_mode_selected)
+       /* probe/remove */
+       if (!sdev->dspless_mode_selected) {
                sof_lnl_ops.probe = lnl_hda_dsp_probe;
+               sof_lnl_ops.remove = lnl_hda_dsp_remove;
+       }
 
        /* shutdown */
        sof_lnl_ops.shutdown = hda_dsp_shutdown;
@@ -134,8 +150,6 @@ int sof_lnl_ops_init(struct snd_sof_dev *sdev)
                sof_lnl_ops.runtime_resume = lnl_hda_dsp_runtime_resume;
        }
 
-       sof_lnl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
        /* dsp core get/put */
        sof_lnl_ops.core_get = mtl_dsp_core_get;
        sof_lnl_ops.core_put = mtl_dsp_core_put;
index df05dc77b8d5e3bef5f3e55ea7e82837b5a89504..060c34988e90d122caf12cc30fe42ba5f1d0c87d 100644 (file)
@@ -626,18 +626,6 @@ static int mtl_dsp_disable_interrupts(struct snd_sof_dev *sdev)
        return mtl_enable_interrupts(sdev, false);
 }
 
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
-                                        struct snd_soc_component *component,
-                                        struct snd_pcm_substream *substream)
-{
-       struct hdac_stream *hstream = substream->runtime->private_data;
-       u32 llp_l, llp_u;
-
-       llp_l = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPL(hstream->index));
-       llp_u = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR, MTL_PPLCLLPU(hstream->index));
-       return ((u64)llp_u << 32) | llp_l;
-}
-
 int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core)
 {
        const struct sof_ipc_pm_ops *pm_ops = sdev->ipc->ops->pm;
@@ -707,8 +695,6 @@ int sof_mtl_ops_init(struct snd_sof_dev *sdev)
        sof_mtl_ops.core_get = mtl_dsp_core_get;
        sof_mtl_ops.core_put = mtl_dsp_core_put;
 
-       sof_mtl_ops.get_stream_position = mtl_dsp_get_stream_hda_link_position;
-
        sdev->private = kzalloc(sizeof(struct sof_ipc4_fw_data), GFP_KERNEL);
        if (!sdev->private)
                return -ENOMEM;
index cc5a1f46fd09560e9fefc10d6b4775b82294bfd4..ea8c1b83f7127d58f76bd5db018eeb0f0d9b1d7f 100644 (file)
@@ -6,12 +6,6 @@
  * Copyright(c) 2020-2022 Intel Corporation. All rights reserved.
  */
 
-/* HDA Registers */
-#define MTL_PPLCLLPL_BASE              0x948
-#define MTL_PPLCLLPU_STRIDE            0x10
-#define MTL_PPLCLLPL(x)                        (MTL_PPLCLLPL_BASE + (x) * MTL_PPLCLLPU_STRIDE)
-#define MTL_PPLCLLPU(x)                        (MTL_PPLCLLPL_BASE + 0x4 + (x) * MTL_PPLCLLPU_STRIDE)
-
 /* DSP Registers */
 #define MTL_HFDSSCS                    0x1000
 #define MTL_HFDSSCS_SPA_MASK           BIT(16)
@@ -103,9 +97,5 @@ int mtl_dsp_ipc_get_window_offset(struct snd_sof_dev *sdev, u32 id);
 
 void mtl_ipc_dump(struct snd_sof_dev *sdev);
 
-u64 mtl_dsp_get_stream_hda_link_position(struct snd_sof_dev *sdev,
-                                        struct snd_soc_component *component,
-                                        struct snd_pcm_substream *substream);
-
 int mtl_dsp_core_get(struct snd_sof_dev *sdev, int core);
 int mtl_dsp_core_put(struct snd_sof_dev *sdev, int core);
index 9f1e33ee8826123cdc57bd0da78b876e79bf6f27..0e04bea9432ddab2e60b2f61d209689d560b39fb 100644 (file)
@@ -4,6 +4,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/clock.h>
 #include <sound/sof/ipc4/header.h>
 #include "sof-priv.h"
 #include "ipc4-priv.h"
@@ -412,7 +413,6 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev)
        const struct sof_ipc_ops *iops = sdev->ipc->ops;
        struct sof_ipc4_msg msg;
        u64 system_time;
-       ktime_t kt;
        int ret;
 
        if (priv->mtrace_state != SOF_MTRACE_DISABLED)
@@ -424,9 +424,12 @@ static int ipc4_mtrace_enable(struct snd_sof_dev *sdev)
        msg.primary |= SOF_IPC4_MOD_INSTANCE(SOF_IPC4_MOD_INIT_BASEFW_INSTANCE_ID);
        msg.extension = SOF_IPC4_MOD_EXT_MSG_PARAM_ID(SOF_IPC4_FW_PARAM_SYSTEM_TIME);
 
-       /* The system time is in usec, UTC, epoch is 1601-01-01 00:00:00 */
-       kt = ktime_add_us(ktime_get_real(), FW_EPOCH_DELTA * USEC_PER_SEC);
-       system_time = ktime_to_us(kt);
+       /*
+        * local_clock() is used to align with dmesg, so both kernel and firmware logs have
+        * the same base and a minor delta due to the IPC. system time is in us format but
+        * local_clock() returns the time in ns, so convert to ns.
+        */
+       system_time = div64_u64(local_clock(), NSEC_PER_USEC);
        msg.data_size = sizeof(system_time);
        msg.data_ptr = &system_time;
        ret = iops->set_get_data(sdev, &msg, msg.data_size, true);
index 0f332c8cdbe6afe6fc9449b48194ebb749db5d3d..e915f9f87a6c35d74f1cf7096accca70dce688da 100644 (file)
 #include "ipc4-topology.h"
 #include "ipc4-fw-reg.h"
 
+/**
+ * struct sof_ipc4_timestamp_info - IPC4 timestamp info
+ * @host_copier: the host copier of the pcm stream
+ * @dai_copier: the dai copier of the pcm stream
+ * @stream_start_offset: reported by fw in memory window (converted to frames)
+ * @stream_end_offset: reported by fw in memory window (converted to frames)
+ * @llp_offset: llp offset in memory window
+ * @boundary: wrap boundary should be used for the LLP frame counter
+ * @delay: Calculated and stored in pointer callback. The stored value is
+ *        returned in the delay callback.
+ */
+struct sof_ipc4_timestamp_info {
+       struct sof_ipc4_copier *host_copier;
+       struct sof_ipc4_copier *dai_copier;
+       u64 stream_start_offset;
+       u64 stream_end_offset;
+       u32 llp_offset;
+
+       u64 boundary;
+       snd_pcm_sframes_t delay;
+};
+
 static int sof_ipc4_set_multi_pipeline_state(struct snd_sof_dev *sdev, u32 state,
                                             struct ipc4_pipeline_set_state_data *trigger_list)
 {
@@ -423,8 +445,19 @@ static int sof_ipc4_trigger_pipelines(struct snd_soc_component *component,
        }
 
        /* return if this is the final state */
-       if (state == SOF_IPC4_PIPE_PAUSED)
+       if (state == SOF_IPC4_PIPE_PAUSED) {
+               struct sof_ipc4_timestamp_info *time_info;
+
+               /*
+                * Invalidate the stream_start_offset to make sure that it is
+                * going to be updated if the stream resumes
+                */
+               time_info = spcm->stream[substream->stream].private;
+               if (time_info)
+                       time_info->stream_start_offset = SOF_IPC4_INVALID_STREAM_POSITION;
+
                goto free;
+       }
 skip_pause_transition:
        /* else set the RUNNING/RESET state in the DSP */
        ret = sof_ipc4_set_multi_pipeline_state(sdev, state, trigger_list);
@@ -464,14 +497,12 @@ static int sof_ipc4_pcm_trigger(struct snd_soc_component *component,
 
        /* determine the pipeline state */
        switch (cmd) {
-       case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-               state = SOF_IPC4_PIPE_PAUSED;
-               break;
        case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
        case SNDRV_PCM_TRIGGER_RESUME:
        case SNDRV_PCM_TRIGGER_START:
                state = SOF_IPC4_PIPE_RUNNING;
                break;
+       case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
        case SNDRV_PCM_TRIGGER_SUSPEND:
        case SNDRV_PCM_TRIGGER_STOP:
                state = SOF_IPC4_PIPE_PAUSED;
@@ -703,6 +734,10 @@ static int sof_ipc4_pcm_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm
        if (abi_version < SOF_IPC4_FW_REGS_ABI_VER)
                support_info = false;
 
+       /* For delay reporting the get_host_byte_counter callback is needed */
+       if (!sof_ops(sdev) || !sof_ops(sdev)->get_host_byte_counter)
+               support_info = false;
+
        for_each_pcm_streams(stream) {
                pipeline_list = &spcm->stream[stream].pipeline_list;
 
@@ -835,7 +870,6 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev,
        struct sof_ipc4_copier *host_copier = time_info->host_copier;
        struct sof_ipc4_copier *dai_copier = time_info->dai_copier;
        struct sof_ipc4_pipeline_registers ppl_reg;
-       u64 stream_start_position;
        u32 dai_sample_size;
        u32 ch, node_index;
        u32 offset;
@@ -852,38 +886,51 @@ static int sof_ipc4_get_stream_start_offset(struct snd_sof_dev *sdev,
        if (ppl_reg.stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION)
                return -EINVAL;
 
-       stream_start_position = ppl_reg.stream_start_offset;
        ch = dai_copier->data.out_format.fmt_cfg;
        ch = SOF_IPC4_AUDIO_FORMAT_CFG_CHANNELS_COUNT(ch);
        dai_sample_size = (dai_copier->data.out_format.bit_depth >> 3) * ch;
-       /* convert offset to sample count */
-       do_div(stream_start_position, dai_sample_size);
-       time_info->stream_start_offset = stream_start_position;
+
+       /* convert offsets to frame count */
+       time_info->stream_start_offset = ppl_reg.stream_start_offset;
+       do_div(time_info->stream_start_offset, dai_sample_size);
+       time_info->stream_end_offset = ppl_reg.stream_end_offset;
+       do_div(time_info->stream_end_offset, dai_sample_size);
+
+       /*
+        * Calculate the wrap boundary need to be used for delay calculation
+        * The host counter is in bytes, it will wrap earlier than the frames
+        * based link counter.
+        */
+       time_info->boundary = div64_u64(~((u64)0),
+                                       frames_to_bytes(substream->runtime, 1));
+       /* Initialize the delay value to 0 (no delay) */
+       time_info->delay = 0;
 
        return 0;
 }
 
-static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
-                                           struct snd_pcm_substream *substream)
+static int sof_ipc4_pcm_pointer(struct snd_soc_component *component,
+                               struct snd_pcm_substream *substream,
+                               snd_pcm_uframes_t *pointer)
 {
        struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
        struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
        struct sof_ipc4_timestamp_info *time_info;
        struct sof_ipc4_llp_reading_slot llp;
-       snd_pcm_uframes_t head_ptr, tail_ptr;
+       snd_pcm_uframes_t head_cnt, tail_cnt;
        struct snd_sof_pcm_stream *stream;
+       u64 dai_cnt, host_cnt, host_ptr;
        struct snd_sof_pcm *spcm;
-       u64 tmp_ptr;
        int ret;
 
        spcm = snd_sof_find_spcm_dai(component, rtd);
        if (!spcm)
-               return 0;
+               return -EOPNOTSUPP;
 
        stream = &spcm->stream[substream->stream];
        time_info = stream->private;
        if (!time_info)
-               return 0;
+               return -EOPNOTSUPP;
 
        /*
         * stream_start_offset is updated to memory window by FW based on
@@ -893,45 +940,116 @@ static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
        if (time_info->stream_start_offset == SOF_IPC4_INVALID_STREAM_POSITION) {
                ret = sof_ipc4_get_stream_start_offset(sdev, substream, stream, time_info);
                if (ret < 0)
-                       return 0;
+                       return -EOPNOTSUPP;
        }
 
+       /* For delay calculation we need the host counter */
+       host_cnt = snd_sof_pcm_get_host_byte_counter(sdev, component, substream);
+       host_ptr = host_cnt;
+
+       /* convert the host_cnt to frames */
+       host_cnt = div64_u64(host_cnt, frames_to_bytes(substream->runtime, 1));
+
        /*
-        * HDaudio links don't support the LLP counter reported by firmware
-        * the link position is read directly from hardware registers.
+        * If the LLP counter is not reported by firmware in the SRAM window
+        * then read the dai (link) counter via host accessible means if
+        * available.
         */
        if (!time_info->llp_offset) {
-               tmp_ptr = snd_sof_pcm_get_stream_position(sdev, component, substream);
-               if (!tmp_ptr)
-                       return 0;
+               dai_cnt = snd_sof_pcm_get_dai_frame_counter(sdev, component, substream);
+               if (!dai_cnt)
+                       return -EOPNOTSUPP;
        } else {
                sof_mailbox_read(sdev, time_info->llp_offset, &llp, sizeof(llp));
-               tmp_ptr = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
+               dai_cnt = ((u64)llp.reading.llp_u << 32) | llp.reading.llp_l;
        }
+       dai_cnt += time_info->stream_end_offset;
 
-       /* In two cases dai dma position is not accurate
+       /* In two cases dai dma counter is not accurate
         * (1) dai pipeline is started before host pipeline
-        * (2) multiple streams mixed into one. Each stream has the same dai dma position
+        * (2) multiple streams mixed into one. Each stream has the same dai dma
+        *     counter
         *
-        * Firmware calculates correct stream_start_offset for all cases including above two.
-        * Driver subtracts stream_start_offset from dai dma position to get accurate one
+        * Firmware calculates correct stream_start_offset for all cases
+        * including above two.
+        * Driver subtracts stream_start_offset from dai dma counter to get
+        * accurate one
         */
-       tmp_ptr -= time_info->stream_start_offset;
 
-       /* Calculate the delay taking into account that both pointer can wrap */
-       div64_u64_rem(tmp_ptr, substream->runtime->boundary, &tmp_ptr);
+       /*
+        * On stream start the dai counter might not yet have reached the
+        * stream_start_offset value which means that no frames have left the
+        * DSP yet from the audio stream (on playback, capture streams have
+        * offset of 0 as we start capturing right away).
+        * In this case we need to adjust the distance between the counters by
+        * increasing the host counter by (offset - dai_counter).
+        * Otherwise the dai_counter needs to be adjusted to reflect the number
+        * of valid frames passed on the DAI side.
+        *
+        * The delay is the difference between the counters on the two
+        * sides of the DSP.
+        */
+       if (dai_cnt < time_info->stream_start_offset) {
+               host_cnt += time_info->stream_start_offset - dai_cnt;
+               dai_cnt = 0;
+       } else {
+               dai_cnt -= time_info->stream_start_offset;
+       }
+
+       /* Wrap the dai counter at the boundary where the host counter wraps */
+       div64_u64_rem(dai_cnt, time_info->boundary, &dai_cnt);
+
        if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
-               head_ptr = substream->runtime->status->hw_ptr;
-               tail_ptr = tmp_ptr;
+               head_cnt = host_cnt;
+               tail_cnt = dai_cnt;
        } else {
-               head_ptr = tmp_ptr;
-               tail_ptr = substream->runtime->status->hw_ptr;
+               head_cnt = dai_cnt;
+               tail_cnt = host_cnt;
+       }
+
+       if (head_cnt < tail_cnt) {
+               time_info->delay = time_info->boundary - tail_cnt + head_cnt;
+               goto out;
        }
 
-       if (head_ptr < tail_ptr)
-               return substream->runtime->boundary - tail_ptr + head_ptr;
+       time_info->delay =  head_cnt - tail_cnt;
+
+out:
+       /*
+        * Convert the host byte counter to PCM pointer which wraps in buffer
+        * and it is in frames
+        */
+       div64_u64_rem(host_ptr, snd_pcm_lib_buffer_bytes(substream), &host_ptr);
+       *pointer = bytes_to_frames(substream->runtime, host_ptr);
+
+       return 0;
+}
+
+static snd_pcm_sframes_t sof_ipc4_pcm_delay(struct snd_soc_component *component,
+                                           struct snd_pcm_substream *substream)
+{
+       struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+       struct sof_ipc4_timestamp_info *time_info;
+       struct snd_sof_pcm_stream *stream;
+       struct snd_sof_pcm *spcm;
+
+       spcm = snd_sof_find_spcm_dai(component, rtd);
+       if (!spcm)
+               return 0;
+
+       stream = &spcm->stream[substream->stream];
+       time_info = stream->private;
+       /*
+        * Report the stored delay value calculated in the pointer callback.
+        * In the unlikely event that the calculation was skipped/aborted, the
+        * default 0 delay returned.
+        */
+       if (time_info)
+               return time_info->delay;
+
+       /* No delay information available, report 0 as delay */
+       return 0;
 
-       return head_ptr - tail_ptr;
 }
 
 const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
@@ -941,6 +1059,7 @@ const struct sof_ipc_pcm_ops ipc4_pcm_ops = {
        .dai_link_fixup = sof_ipc4_pcm_dai_link_fixup,
        .pcm_setup = sof_ipc4_pcm_setup,
        .pcm_free = sof_ipc4_pcm_free,
+       .pointer = sof_ipc4_pcm_pointer,
        .delay = sof_ipc4_pcm_delay,
        .ipc_first_on_start = true,
        .platform_stop_during_hw_free = true,
index f3b908b093f9562ddeb6932b4f1743a27c2b3c09..afed618a15f061a8588466490ee38ea19a80bc3d 100644 (file)
@@ -92,20 +92,6 @@ struct sof_ipc4_fw_data {
        struct mutex pipeline_state_mutex; /* protect pipeline triggers, ref counts and states */
 };
 
-/**
- * struct sof_ipc4_timestamp_info - IPC4 timestamp info
- * @host_copier: the host copier of the pcm stream
- * @dai_copier: the dai copier of the pcm stream
- * @stream_start_offset: reported by fw in memory window
- * @llp_offset: llp offset in memory window
- */
-struct sof_ipc4_timestamp_info {
-       struct sof_ipc4_copier *host_copier;
-       struct sof_ipc4_copier *dai_copier;
-       u64 stream_start_offset;
-       u32 llp_offset;
-};
-
 extern const struct sof_ipc_fw_loader_ops ipc4_loader_ops;
 extern const struct sof_ipc_tplg_ops ipc4_tplg_ops;
 extern const struct sof_ipc_tplg_control_ops tplg_ipc4_control_ops;
index f28edd9830c1b3e25961add70dff87a489cfa119..5cca058421260978dd18e992b09dfff58b44bbdb 100644 (file)
@@ -412,8 +412,9 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget)
        struct sof_ipc4_available_audio_format *available_fmt;
        struct snd_soc_component *scomp = swidget->scomp;
        struct sof_ipc4_copier *ipc4_copier;
+       struct snd_sof_pcm *spcm;
        int node_type = 0;
-       int ret;
+       int ret, dir;
 
        ipc4_copier = kzalloc(sizeof(*ipc4_copier), GFP_KERNEL);
        if (!ipc4_copier)
@@ -447,6 +448,25 @@ static int sof_ipc4_widget_setup_pcm(struct snd_sof_widget *swidget)
        }
        dev_dbg(scomp->dev, "host copier '%s' node_type %u\n", swidget->widget->name, node_type);
 
+       spcm = snd_sof_find_spcm_comp(scomp, swidget->comp_id, &dir);
+       if (!spcm)
+               goto skip_gtw_cfg;
+
+       if (dir == SNDRV_PCM_STREAM_PLAYBACK) {
+               struct snd_sof_pcm_stream *sps = &spcm->stream[dir];
+
+               sof_update_ipc_object(scomp, &sps->dsp_max_burst_size_in_ms,
+                                     SOF_COPIER_DEEP_BUFFER_TOKENS,
+                                     swidget->tuples,
+                                     swidget->num_tuples, sizeof(u32), 1);
+               /* Set default DMA buffer size if it is not specified in topology */
+               if (!sps->dsp_max_burst_size_in_ms)
+                       sps->dsp_max_burst_size_in_ms = SOF_IPC4_MIN_DMA_BUFFER_SIZE;
+       } else {
+               /* Capture data is copied from DSP to host in 1ms bursts */
+               spcm->stream[dir].dsp_max_burst_size_in_ms = 1;
+       }
+
 skip_gtw_cfg:
        ipc4_copier->gtw_attr = kzalloc(sizeof(*ipc4_copier->gtw_attr), GFP_KERNEL);
        if (!ipc4_copier->gtw_attr) {
index 6cf21e829e07272ccf4c7005f74f9ae61403d39b..3cd748e13460916517d9533c48d2172d556fc344 100644 (file)
@@ -523,12 +523,26 @@ static inline int snd_sof_pcm_platform_ack(struct snd_sof_dev *sdev,
        return 0;
 }
 
-static inline u64 snd_sof_pcm_get_stream_position(struct snd_sof_dev *sdev,
-                                                 struct snd_soc_component *component,
-                                                 struct snd_pcm_substream *substream)
+static inline u64
+snd_sof_pcm_get_dai_frame_counter(struct snd_sof_dev *sdev,
+                                 struct snd_soc_component *component,
+                                 struct snd_pcm_substream *substream)
 {
-       if (sof_ops(sdev) && sof_ops(sdev)->get_stream_position)
-               return sof_ops(sdev)->get_stream_position(sdev, component, substream);
+       if (sof_ops(sdev) && sof_ops(sdev)->get_dai_frame_counter)
+               return sof_ops(sdev)->get_dai_frame_counter(sdev, component,
+                                                           substream);
+
+       return 0;
+}
+
+static inline u64
+snd_sof_pcm_get_host_byte_counter(struct snd_sof_dev *sdev,
+                                 struct snd_soc_component *component,
+                                 struct snd_pcm_substream *substream)
+{
+       if (sof_ops(sdev) && sof_ops(sdev)->get_host_byte_counter)
+               return sof_ops(sdev)->get_host_byte_counter(sdev, component,
+                                                           substream);
 
        return 0;
 }
index 33d576b1764783ab3468591703e0c97106893e9e..f03cee94bce62642e3c419d4f956a2011ea4dd3f 100644 (file)
@@ -388,13 +388,21 @@ static snd_pcm_uframes_t sof_pcm_pointer(struct snd_soc_component *component,
 {
        struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
        struct snd_sof_dev *sdev = snd_soc_component_get_drvdata(component);
+       const struct sof_ipc_pcm_ops *pcm_ops = sof_ipc_get_ops(sdev, pcm);
        struct snd_sof_pcm *spcm;
        snd_pcm_uframes_t host, dai;
+       int ret = -EOPNOTSUPP;
 
        /* nothing to do for BE */
        if (rtd->dai_link->no_pcm)
                return 0;
 
+       if (pcm_ops && pcm_ops->pointer)
+               ret = pcm_ops->pointer(component, substream, &host);
+
+       if (ret != -EOPNOTSUPP)
+               return ret ? ret : host;
+
        /* use dsp ops pointer callback directly if set */
        if (sof_ops(sdev)->pcm_pointer)
                return sof_ops(sdev)->pcm_pointer(sdev, substream);
index 9ea2ac5adac79ee322f82060b908ce529cd9c43b..86bbb531e142c72be1ca5d710c466d16c9058734 100644 (file)
@@ -103,7 +103,10 @@ struct snd_sof_dai_config_data {
  *            additional memory in the SOF PCM stream structure
  * @pcm_free: Function pointer for PCM free that can be used for freeing any
  *            additional memory in the SOF PCM stream structure
- * @delay: Function pointer for pcm delay calculation
+ * @pointer: Function pointer for pcm pointer
+ *          Note: the @pointer callback may return -EOPNOTSUPP which should be
+ *                handled in a same way as if the callback is not provided
+ * @delay: Function pointer for pcm delay reporting
  * @reset_hw_params_during_stop: Flag indicating whether the hw_params should be reset during the
  *                              STOP pcm trigger
  * @ipc_first_on_start: Send IPC before invoking platform trigger during
@@ -124,6 +127,9 @@ struct sof_ipc_pcm_ops {
        int (*dai_link_fixup)(struct snd_soc_pcm_runtime *rtd, struct snd_pcm_hw_params *params);
        int (*pcm_setup)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
        void (*pcm_free)(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm);
+       int (*pointer)(struct snd_soc_component *component,
+                      struct snd_pcm_substream *substream,
+                      snd_pcm_uframes_t *pointer);
        snd_pcm_sframes_t (*delay)(struct snd_soc_component *component,
                                   struct snd_pcm_substream *substream);
        bool reset_hw_params_during_stop;
@@ -322,6 +328,7 @@ struct snd_sof_pcm_stream {
        struct work_struct period_elapsed_work;
        struct snd_soc_dapm_widget_list *list; /* list of connected DAPM widgets */
        bool d0i3_compatible; /* DSP can be in D0I3 when this pcm is opened */
+       unsigned int dsp_max_burst_size_in_ms; /* The maximum size of the host DMA burst in ms */
        /*
         * flag to indicate that the DSP pipelines should be kept
         * active or not while suspending the stream
index d453a4ce3b219d601813310c22cbf11029a08a77..d3c436f826046bca9f385b429d6d7e1639600f63 100644 (file)
@@ -262,13 +262,25 @@ struct snd_sof_dsp_ops {
        int (*pcm_ack)(struct snd_sof_dev *sdev, struct snd_pcm_substream *substream); /* optional */
 
        /*
-        * optional callback to retrieve the link DMA position for the substream
-        * when the position is not reported in the shared SRAM windows but
-        * instead from a host-accessible hardware counter.
+        * optional callback to retrieve the number of frames left/arrived from/to
+        * the DSP on the DAI side (link/codec/DMIC/etc).
+        *
+        * The callback is used when the firmware does not provide this information
+        * via the shared SRAM window and it can be retrieved by host.
         */
-       u64 (*get_stream_position)(struct snd_sof_dev *sdev,
-                                  struct snd_soc_component *component,
-                                  struct snd_pcm_substream *substream); /* optional */
+       u64 (*get_dai_frame_counter)(struct snd_sof_dev *sdev,
+                                    struct snd_soc_component *component,
+                                    struct snd_pcm_substream *substream); /* optional */
+
+       /*
+        * Optional callback to retrieve the number of bytes left/arrived from/to
+        * the DSP on the host side (bytes between host ALSA buffer and DSP).
+        *
+        * The callback is needed for ALSA delay reporting.
+        */
+       u64 (*get_host_byte_counter)(struct snd_sof_dev *sdev,
+                                    struct snd_soc_component *component,
+                                    struct snd_pcm_substream *substream); /* optional */
 
        /* host read DSP stream data */
        int (*ipc_msg_data)(struct snd_sof_dev *sdev,
index b67617b68e509d2c86d78058f7796a64aab00f41..f4437015d43a7500b809a303f175b211662d500f 100644 (file)
@@ -202,7 +202,7 @@ int line6_send_raw_message_async(struct usb_line6 *line6, const char *buffer,
        struct urb *urb;
 
        /* create message: */
-       msg = kmalloc(sizeof(struct message), GFP_ATOMIC);
+       msg = kzalloc(sizeof(struct message), GFP_ATOMIC);
        if (msg == NULL)
                return -ENOMEM;
 
@@ -688,7 +688,7 @@ static int line6_init_cap_control(struct usb_line6 *line6)
        int ret;
 
        /* initialize USB buffers: */
-       line6->buffer_listen = kmalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
+       line6->buffer_listen = kzalloc(LINE6_BUFSIZE_LISTEN, GFP_KERNEL);
        if (!line6->buffer_listen)
                return -ENOMEM;
 
@@ -697,7 +697,7 @@ static int line6_init_cap_control(struct usb_line6 *line6)
                return -ENOMEM;
 
        if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) {
-               line6->buffer_message = kmalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
+               line6->buffer_message = kzalloc(LINE6_MIDI_MESSAGE_MAXLEN, GFP_KERNEL);
                if (!line6->buffer_message)
                        return -ENOMEM;
 
index 72535f00572f6eca7f1d0df4511cf1dfd239b84b..72ea363d434db09c06c6acc9ed1855dec7188504 100644 (file)
@@ -3,6 +3,8 @@
 #ifndef _LINUX_BTF_IDS_H
 #define _LINUX_BTF_IDS_H
 
+#include <linux/types.h> /* for u32 */
+
 struct btf_id_set {
        u32 cnt;
        u32 ids[];
index ddba2c2fb5deb1b2ec2ab02db352bada7e70feee..4eaba83cdcf3d20f522e4d9a5fec5a65117ccaaf 100644 (file)
@@ -135,8 +135,8 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
 
                irq_iter = READ_ONCE(shared_data->nr_iter);
                __GUEST_ASSERT(config_iter + 1 == irq_iter,
-                               "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
-                               "  Guest timer interrupt was not trigged within the specified\n"
+                               "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+                               "  Guest timer interrupt was not triggered within the specified\n"
                                "  interval, try to increase the error margin by [-e] option.\n",
                                config_iter + 1, irq_iter);
        }
index 3bd03b088dda605348c7f85fc8d190ef63cf9e5e..81ce37ec407dd18b04abab3ff0a40c8deca9cd65 100644 (file)
@@ -1037,8 +1037,19 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
 void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
                             struct kvm_x86_cpu_property property,
                             uint32_t value);
+void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
 
 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+
+static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
+                                 struct kvm_x86_cpu_feature feature)
+{
+       struct kvm_cpuid_entry2 *entry;
+
+       entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+       return *((&entry->eax) + feature.reg) & BIT(feature.bit);
+}
+
 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
                                     struct kvm_x86_cpu_feature feature,
                                     bool set);
index e22848f747c0152f9d4acd0d0a3a8636c471a587..0f9cabd99fd451290410f75783d65c6dcf79e55c 100644 (file)
@@ -60,7 +60,7 @@ static void guest_run(struct test_vcpu_shared_data *shared_data)
                irq_iter = READ_ONCE(shared_data->nr_iter);
                __GUEST_ASSERT(config_iter + 1 == irq_iter,
                                "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
-                               "  Guest timer interrupt was not trigged within the specified\n"
+                               "  Guest timer interrupt was not triggered within the specified\n"
                                "  interval, try to increase the error margin by [-e] option.\n",
                                config_iter + 1, irq_iter);
        }
index 9e2879af7c201fb749120cee94a61e03dfaf162b..40cc59f4e6501316695485a10532479787899d81 100644 (file)
@@ -133,6 +133,43 @@ static void enter_guest(struct kvm_vcpu *vcpu)
        }
 }
 
+static void test_pv_unhalt(void)
+{
+       struct kvm_vcpu *vcpu;
+       struct kvm_vm *vm;
+       struct kvm_cpuid_entry2 *ent;
+       u32 kvm_sig_old;
+
+       pr_info("testing KVM_FEATURE_PV_UNHALT\n");
+
+       TEST_REQUIRE(KVM_CAP_X86_DISABLE_EXITS);
+
+       /* KVM_PV_UNHALT test */
+       vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+       vcpu_set_cpuid_feature(vcpu, X86_FEATURE_KVM_PV_UNHALT);
+
+       TEST_ASSERT(vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+                   "Enabling X86_FEATURE_KVM_PV_UNHALT had no effect");
+
+       /* Make sure KVM clears vcpu->arch.kvm_cpuid */
+       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+       kvm_sig_old = ent->ebx;
+       ent->ebx = 0xdeadbeef;
+       vcpu_set_cpuid(vcpu);
+
+       vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, KVM_X86_DISABLE_EXITS_HLT);
+       ent = vcpu_get_cpuid_entry(vcpu, KVM_CPUID_SIGNATURE);
+       ent->ebx = kvm_sig_old;
+       vcpu_set_cpuid(vcpu);
+
+       TEST_ASSERT(!vcpu_cpuid_has(vcpu, X86_FEATURE_KVM_PV_UNHALT),
+                   "KVM_FEATURE_PV_UNHALT is set with KVM_CAP_X86_DISABLE_EXITS");
+
+       /* FIXME: actually test KVM_FEATURE_PV_UNHALT feature */
+
+       kvm_vm_free(vm);
+}
+
 int main(void)
 {
        struct kvm_vcpu *vcpu;
@@ -151,4 +188,6 @@ int main(void)
 
        enter_guest(vcpu);
        kvm_vm_free(vm);
+
+       test_pv_unhalt();
 }
index a2662348cdb1a20753a043c44c7faae2b4603871..b7b54d646b937cc2ac0071ff661919ae21920459 100644 (file)
@@ -6,7 +6,9 @@
 
 #include "../kselftest_harness.h"
 
-struct in6_addr in6addr_v4mapped_any = {
+static const __u32 in4addr_any = INADDR_ANY;
+static const __u32 in4addr_loopback = INADDR_LOOPBACK;
+static const struct in6_addr in6addr_v4mapped_any = {
        .s6_addr = {
                0, 0, 0, 0,
                0, 0, 0, 0,
@@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = {
                0, 0, 0, 0
        }
 };
-
-struct in6_addr in6addr_v4mapped_loopback = {
+static const struct in6_addr in6addr_v4mapped_loopback = {
        .s6_addr = {
                0, 0, 0, 0,
                0, 0, 0, 0,
@@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = {
        }
 };
 
+#define NR_SOCKETS 8
+
 FIXTURE(bind_wildcard)
 {
-       struct sockaddr_in addr4;
-       struct sockaddr_in6 addr6;
+       int fd[NR_SOCKETS];
+       socklen_t addrlen[NR_SOCKETS];
+       union {
+               struct sockaddr addr;
+               struct sockaddr_in addr4;
+               struct sockaddr_in6 addr6;
+       } addr[NR_SOCKETS];
 };
 
 FIXTURE_VARIANT(bind_wildcard)
 {
-       const __u32 addr4_const;
-       const struct in6_addr *addr6_const;
-       int expected_errno;
+       sa_family_t family[2];
+       const void *addr[2];
+       bool ipv6_only[2];
+
+       /* 6 bind() calls below follow two bind() for the defined 2 addresses:
+        *
+        *   0.0.0.0
+        *   127.0.0.1
+        *   ::
+        *   ::1
+        *   ::ffff:0.0.0.0
+        *   ::ffff:127.0.0.1
+        */
+       int expected_errno[NR_SOCKETS];
+       int expected_reuse_errno[NR_SOCKETS];
+};
+
+/* (IPv4, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local)
+{
+       .family = {AF_INET, AF_INET},
+       .addr = {&in4addr_any, &in4addr_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any)
+{
+       .family = {AF_INET, AF_INET},
+       .addr = {&in4addr_loopback, &in4addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
 };
 
+/* (IPv4, IPv6) */
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
 {
-       .addr4_const = INADDR_ANY,
-       .addr6_const = &in6addr_any,
-       .expected_errno = EADDRINUSE,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_any, &in6addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only)
+{
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_any, &in6addr_any},
+       .ipv6_only = {false, true},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
 {
-       .addr4_const = INADDR_ANY,
-       .addr6_const = &in6addr_loopback,
-       .expected_errno = 0,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_any, &in6addr_loopback},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
 {
-       .addr4_const = INADDR_ANY,
-       .addr6_const = &in6addr_v4mapped_any,
-       .expected_errno = EADDRINUSE,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_any, &in6addr_v4mapped_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
 {
-       .addr4_const = INADDR_ANY,
-       .addr6_const = &in6addr_v4mapped_loopback,
-       .expected_errno = EADDRINUSE,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_any, &in6addr_v4mapped_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
 {
-       .addr4_const = INADDR_LOOPBACK,
-       .addr6_const = &in6addr_any,
-       .expected_errno = EADDRINUSE,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_loopback, &in6addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only)
+{
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_loopback, &in6addr_any},
+       .ipv6_only = {false, true},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
 {
-       .addr4_const = INADDR_LOOPBACK,
-       .addr6_const = &in6addr_loopback,
-       .expected_errno = 0,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_loopback, &in6addr_loopback},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
 {
-       .addr4_const = INADDR_LOOPBACK,
-       .addr6_const = &in6addr_v4mapped_any,
-       .expected_errno = EADDRINUSE,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_loopback, &in6addr_v4mapped_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
 };
 
 FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
 {
-       .addr4_const = INADDR_LOOPBACK,
-       .addr6_const = &in6addr_v4mapped_loopback,
-       .expected_errno = EADDRINUSE,
+       .family = {AF_INET, AF_INET6},
+       .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv4) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_any, &in4addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
 };
 
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_any, &in4addr_any},
+       .ipv6_only = {true, false},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_any, &in4addr_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_any, &in4addr_loopback},
+       .ipv6_only = {true, false},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_loopback, &in4addr_any},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_loopback, &in4addr_loopback},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_v4mapped_any, &in4addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_v4mapped_any, &in4addr_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_v4mapped_loopback, &in4addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local)
+{
+       .family = {AF_INET6, AF_INET},
+       .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+/* (IPv6, IPv6) */
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_any},
+       .ipv6_only = {true, false},
+       .expected_errno = {0, EADDRINUSE,
+                          0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_any},
+       .ipv6_only = {false, true},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_any},
+       .ipv6_only = {true, true},
+       .expected_errno = {0, EADDRINUSE,
+                          0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                0, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_loopback},
+       .ipv6_only = {true, false},
+       .expected_errno = {0, EADDRINUSE,
+                          0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                0, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_v4mapped_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_v4mapped_any},
+       .ipv6_only = {true, false},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_any, &in6addr_v4mapped_loopback},
+       .ipv6_only = {true, false},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_loopback, &in6addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_loopback, &in6addr_any},
+       .ipv6_only = {false, true},
+       .expected_errno = {0, EADDRINUSE,
+                          0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                0, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_loopback, &in6addr_v4mapped_any},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_any, &in6addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_any, &in6addr_any},
+       .ipv6_only = {false, true},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_any, &in6addr_loopback},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_loopback, &in6addr_any},
+       .ipv6_only = {false, true},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback},
+       .expected_errno = {0, 0,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any)
+{
+       .family = {AF_INET6, AF_INET6},
+       .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any},
+       .expected_errno = {0, EADDRINUSE,
+                          EADDRINUSE, EADDRINUSE,
+                          EADDRINUSE, 0,
+                          EADDRINUSE, EADDRINUSE},
+       .expected_reuse_errno = {0, 0,
+                                EADDRINUSE, EADDRINUSE,
+                                EADDRINUSE, 0,
+                                EADDRINUSE, EADDRINUSE},
+};
+
+static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i,
+                      int family, const void *addr_const)
+{
+       if (family == AF_INET) {
+               struct sockaddr_in *addr4 = &self->addr[i].addr4;
+               const __u32 *addr4_const = addr_const;
+
+               addr4->sin_family = AF_INET;
+               addr4->sin_port = htons(0);
+               addr4->sin_addr.s_addr = htonl(*addr4_const);
+
+               self->addrlen[i] = sizeof(struct sockaddr_in);
+       } else {
+               struct sockaddr_in6 *addr6 = &self->addr[i].addr6;
+               const struct in6_addr *addr6_const = addr_const;
+
+               addr6->sin6_family = AF_INET6;
+               addr6->sin6_port = htons(0);
+               addr6->sin6_addr = *addr6_const;
+
+               self->addrlen[i] = sizeof(struct sockaddr_in6);
+       }
+}
+
 FIXTURE_SETUP(bind_wildcard)
 {
-       self->addr4.sin_family = AF_INET;
-       self->addr4.sin_port = htons(0);
-       self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+       setup_addr(self, 0, variant->family[0], variant->addr[0]);
+       setup_addr(self, 1, variant->family[1], variant->addr[1]);
+
+       setup_addr(self, 2, AF_INET, &in4addr_any);
+       setup_addr(self, 3, AF_INET, &in4addr_loopback);
 
-       self->addr6.sin6_family = AF_INET6;
-       self->addr6.sin6_port = htons(0);
-       self->addr6.sin6_addr = *variant->addr6_const;
+       setup_addr(self, 4, AF_INET6, &in6addr_any);
+       setup_addr(self, 5, AF_INET6, &in6addr_loopback);
+       setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any);
+       setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback);
 }
 
 FIXTURE_TEARDOWN(bind_wildcard)
 {
+       int i;
+
+       for (i = 0; i < NR_SOCKETS; i++)
+               close(self->fd[i]);
 }
 
-void bind_sockets(struct __test_metadata *_metadata,
-                 FIXTURE_DATA(bind_wildcard) *self,
-                 int expected_errno,
-                 struct sockaddr *addr1, socklen_t addrlen1,
-                 struct sockaddr *addr2, socklen_t addrlen2)
+void bind_socket(struct __test_metadata *_metadata,
+                FIXTURE_DATA(bind_wildcard) *self,
+                const FIXTURE_VARIANT(bind_wildcard) *variant,
+                int i, int reuse)
 {
-       int fd[2];
        int ret;
 
-       fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
-       ASSERT_GT(fd[0], 0);
+       self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0);
+       ASSERT_GT(self->fd[i], 0);
 
-       ret = bind(fd[0], addr1, addrlen1);
-       ASSERT_EQ(ret, 0);
+       if (i < 2 && variant->ipv6_only[i]) {
+               ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int));
+               ASSERT_EQ(ret, 0);
+       }
 
-       ret = getsockname(fd[0], addr1, &addrlen1);
-       ASSERT_EQ(ret, 0);
+       if (i < 2 && reuse) {
+               ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int));
+               ASSERT_EQ(ret, 0);
+       }
 
-       ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+       self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port;
 
-       fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
-       ASSERT_GT(fd[1], 0);
+       ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]);
 
-       ret = bind(fd[1], addr2, addrlen2);
-       if (expected_errno) {
-               ASSERT_EQ(ret, -1);
-               ASSERT_EQ(errno, expected_errno);
+       if (reuse) {
+               if (variant->expected_reuse_errno[i]) {
+                       ASSERT_EQ(ret, -1);
+                       ASSERT_EQ(errno, variant->expected_reuse_errno[i]);
+               } else {
+                       ASSERT_EQ(ret, 0);
+               }
        } else {
+               if (variant->expected_errno[i]) {
+                       ASSERT_EQ(ret, -1);
+                       ASSERT_EQ(errno, variant->expected_errno[i]);
+               } else {
+                       ASSERT_EQ(ret, 0);
+               }
+       }
+
+       if (i == 0) {
+               ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]);
                ASSERT_EQ(ret, 0);
        }
+}
 
-       close(fd[1]);
-       close(fd[0]);
+TEST_F(bind_wildcard, plain)
+{
+       int i;
+
+       for (i = 0; i < NR_SOCKETS; i++)
+               bind_socket(_metadata, self, variant, i, 0);
 }
 
-TEST_F(bind_wildcard, v4_v6)
+TEST_F(bind_wildcard, reuseaddr)
 {
-       bind_sockets(_metadata, self, variant->expected_errno,
-                    (struct sockaddr *)&self->addr4, sizeof(self->addr4),
-                    (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+       int i;
+
+       for (i = 0; i < NR_SOCKETS; i++)
+               bind_socket(_metadata, self, variant, i, SO_REUSEADDR);
 }
 
-TEST_F(bind_wildcard, v6_v4)
+TEST_F(bind_wildcard, reuseport)
 {
-       bind_sockets(_metadata, self, variant->expected_errno,
-                    (struct sockaddr *)&self->addr6, sizeof(self->addr6),
-                    (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+       int i;
+
+       for (i = 0; i < NR_SOCKETS; i++)
+               bind_socket(_metadata, self, variant, i, SO_REUSEPORT);
 }
 
 TEST_HARNESS_MAIN
index 4c424855482629d778bd67c719a82b791f15b32c..4131f3263a4826b808fca43ab92be4b893bb8522 100755 (executable)
@@ -383,12 +383,14 @@ do_transfer()
        local stat_cookierx_last
        local stat_csum_err_s
        local stat_csum_err_c
+       local stat_tcpfb_last_l
        stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
        stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
        stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
        stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
        stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
        stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+       stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
 
        timeout ${timeout_test} \
                ip netns exec ${listener_ns} \
@@ -457,11 +459,13 @@ do_transfer()
        local stat_cookietx_now
        local stat_cookierx_now
        local stat_ooo_now
+       local stat_tcpfb_now_l
        stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
        stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
        stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
        stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
        stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+       stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
 
        expect_synrx=$((stat_synrx_last_l))
        expect_ackrx=$((stat_ackrx_last_l))
@@ -508,6 +512,11 @@ do_transfer()
                fi
        fi
 
+       if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+               mptcp_lib_pr_fail "unexpected fallback to TCP"
+               rets=1
+       fi
+
        if [ $cookies -eq 2 ];then
                if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
                        extra+=" WARN: CookieSent: did not advance"
index 5e9211e89825685526e122f2eefedb71f4c1860f..e4403236f655488e298c86d51fef70a07d602384 100755 (executable)
@@ -729,7 +729,7 @@ pm_nl_check_endpoint()
                        [ -n "$_flags" ]; flags="flags $_flags"
                        shift
                elif [ $1 = "dev" ]; then
-                       [ -n "$2" ]; dev="dev $1"
+                       [ -n "$2" ]; dev="dev $2"
                        shift
                elif [ $1 = "id" ]; then
                        _id=$2
@@ -3610,6 +3610,8 @@ endpoint_tests()
                local tests_pid=$!
 
                wait_mpj $ns2
+               pm_nl_check_endpoint "creation" \
+                       $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
                chk_subflow_nr "before delete" 2
                chk_mptcp_info subflows 1 subflows 1
 
index 7c5b12664b03b04b62b5060fd6e447e4ae67b89d..bfb07dc495186d86866ce395f49a78190b43353d 100644 (file)
@@ -109,6 +109,6 @@ int main(void)
        fd1 = open_port(0, 1);
        if (fd1 >= 0)
                error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
-       fprintf(stderr, "Success");
+       fprintf(stderr, "Success\n");
        return 0;
 }
index 380cb15e942e42f7ef5cc652c0131c4a592d44f2..83ed987cff340eb018f5296dd4f983ebac4a6245 100755 (executable)
@@ -244,7 +244,7 @@ for family in 4 6; do
        create_vxlan_pair
        ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
        ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
-       run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+       run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10
        cleanup
 
        # use NAT to circumvent GRO FWD check
@@ -258,13 +258,7 @@ for family in 4 6; do
        # load arp cache before running the test to reduce the amount of
        # stray traffic on top of the UDP tunnel
        ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
-       run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
-       cleanup
-
-       create_vxlan_pair
-       run_bench "UDP tunnel fwd perf" $OL_NET$DST
-       ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
-       run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+       run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST
        cleanup
 done