Merge tag 'loongarch-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Sep 2023 19:16:52 +0000 (12:16 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 8 Sep 2023 19:16:52 +0000 (12:16 -0700)
Pull LoongArch updates from Huacai Chen:

 - Allow usage of LSX/LASX in the kernel, and use them for
   SIMD-optimized RAID5/RAID6 routines

 - Add Loongson Binary Translation (LBT) extension support

 - Add basic KGDB & KDB support

 - Add building with kcov coverage

 - Add KFENCE (Kernel Electric-Fence) support

 - Add KASAN (Kernel Address Sanitizer) support

 - Some bug fixes and other small changes

 - Update the default config file

* tag 'loongarch-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson: (25 commits)
  LoongArch: Update Loongson-3 default config file
  LoongArch: Add KASAN (Kernel Address Sanitizer) support
  LoongArch: Simplify the processing of jumping new kernel for KASLR
  kasan: Add (pmd|pud)_init for LoongArch zero_(pud|p4d)_populate process
  kasan: Add __HAVE_ARCH_SHADOW_MAP to support arch specific mapping
  LoongArch: Add KFENCE (Kernel Electric-Fence) support
  LoongArch: Get partial stack information when providing regs parameter
  LoongArch: mm: Add page table mapped mode support for virt_to_page()
  kfence: Defer the assignment of the local variable addr
  LoongArch: Allow building with kcov coverage
  LoongArch: Provide kaslr_offset() to get kernel offset
  LoongArch: Add basic KGDB & KDB support
  LoongArch: Add Loongson Binary Translation (LBT) extension support
  raid6: Add LoongArch SIMD recovery implementation
  raid6: Add LoongArch SIMD syndrome calculation
  LoongArch: Add SIMD-optimized XOR routines
  LoongArch: Allow usage of LSX/LASX in the kernel
  LoongArch: Define symbol 'fault' as a local label in fpu.S
  LoongArch: Adjust {copy, clear}_user exception handler behavior
  LoongArch: Use static defined zero page rather than allocated
  ...

75 files changed:
Documentation/dev-tools/kasan.rst
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/debug/kcov/arch-support.txt
Documentation/features/debug/kgdb/arch-support.txt
Documentation/translations/zh_CN/dev-tools/kasan.rst
arch/loongarch/Kconfig
arch/loongarch/Makefile
arch/loongarch/configs/loongson3_defconfig
arch/loongarch/include/asm/asm-prototypes.h
arch/loongarch/include/asm/asmmacro.h
arch/loongarch/include/asm/kasan.h [new file with mode: 0644]
arch/loongarch/include/asm/kfence.h [new file with mode: 0644]
arch/loongarch/include/asm/kgdb.h [new file with mode: 0644]
arch/loongarch/include/asm/lbt.h [new file with mode: 0644]
arch/loongarch/include/asm/loongarch.h
arch/loongarch/include/asm/mmzone.h
arch/loongarch/include/asm/page.h
arch/loongarch/include/asm/pgalloc.h
arch/loongarch/include/asm/pgtable.h
arch/loongarch/include/asm/processor.h
arch/loongarch/include/asm/setup.h
arch/loongarch/include/asm/stackframe.h
arch/loongarch/include/asm/string.h
arch/loongarch/include/asm/switch_to.h
arch/loongarch/include/asm/thread_info.h
arch/loongarch/include/asm/xor.h [new file with mode: 0644]
arch/loongarch/include/asm/xor_simd.h [new file with mode: 0644]
arch/loongarch/include/uapi/asm/ptrace.h
arch/loongarch/include/uapi/asm/sigcontext.h
arch/loongarch/kernel/Makefile
arch/loongarch/kernel/asm-offsets.c
arch/loongarch/kernel/cpu-probe.c
arch/loongarch/kernel/entry.S
arch/loongarch/kernel/fpu.S
arch/loongarch/kernel/head.S
arch/loongarch/kernel/kfpu.c
arch/loongarch/kernel/kgdb.c [new file with mode: 0644]
arch/loongarch/kernel/lbt.S [new file with mode: 0644]
arch/loongarch/kernel/numa.c
arch/loongarch/kernel/process.c
arch/loongarch/kernel/ptrace.c
arch/loongarch/kernel/relocate.c
arch/loongarch/kernel/setup.c
arch/loongarch/kernel/signal.c
arch/loongarch/kernel/stacktrace.c
arch/loongarch/kernel/traps.c
arch/loongarch/lib/Makefile
arch/loongarch/lib/clear_user.S
arch/loongarch/lib/copy_user.S
arch/loongarch/lib/memcpy.S
arch/loongarch/lib/memmove.S
arch/loongarch/lib/memset.S
arch/loongarch/lib/xor_simd.c [new file with mode: 0644]
arch/loongarch/lib/xor_simd.h [new file with mode: 0644]
arch/loongarch/lib/xor_simd_glue.c [new file with mode: 0644]
arch/loongarch/lib/xor_template.c [new file with mode: 0644]
arch/loongarch/mm/Makefile
arch/loongarch/mm/cache.c
arch/loongarch/mm/fault.c
arch/loongarch/mm/init.c
arch/loongarch/mm/kasan_init.c [new file with mode: 0644]
arch/loongarch/mm/mmap.c
arch/loongarch/mm/pgtable.c
arch/loongarch/vdso/Makefile
include/linux/kasan.h
include/linux/raid/pq.h
lib/raid6/Makefile
lib/raid6/algos.c
lib/raid6/loongarch.h [new file with mode: 0644]
lib/raid6/loongarch_simd.c [new file with mode: 0644]
lib/raid6/recov_loongarch_simd.c [new file with mode: 0644]
lib/raid6/test/Makefile
mm/kasan/init.c
mm/kasan/kasan.h
mm/kfence/core.c

index f4acf9c2e90f6e3c3416546227db1a734f3b34b2..382818a7197aa2bea74345755967783fd5e90b89 100644 (file)
@@ -41,8 +41,8 @@ Support
 Architectures
 ~~~~~~~~~~~~~
 
-Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
-xtensa, and the tag-based KASAN modes are supported only on arm64.
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
 
 Compilers
 ~~~~~~~~~
index bf0124fae643a1bbb13d34a539171354642351b1..c4581c2edb28006def42321db12567f20f4e1b5e 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: | TODO |
index ffcc9f2b1d74d3930302349b3bcf541bb368c6e8..de84cefbcdd36f028e2eb0f1770dde51551a4652 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: | TODO |
     |        mips: |  ok  |
index 958498f9f2a41f2b6c56f2bd3ead11d88b5ef5ae..5e91ec78c80b759cd3af62c696817e7a607e8b93 100644 (file)
@@ -13,7 +13,7 @@
     |        csky: | TODO |
     |     hexagon: |  ok  |
     |        ia64: | TODO |
-    |   loongarch: | TODO |
+    |   loongarch: |  ok  |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
     |        mips: |  ok  |
index 05ef904dbcfb2e154526cb546d22aa65f58b4d90..8fdb20c9665b4b5e8a9afe12fb361f9a1d2315d7 100644 (file)
@@ -42,7 +42,7 @@ KASAN有三种模式:
 体系架构
 ~~~~~~~~
 
-在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
+在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
 而基于标签的KASAN模式只在arm64上支持。
 
 编译器
index ecf282dee513fedb807f5b456e6773fd1a633b4f..e14396a2ddcbfc6d6130b63dca258343a03f35cb 100644 (file)
@@ -8,11 +8,13 @@ config LOONGARCH
        select ACPI_PPTT if ACPI
        select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
        select ARCH_BINFMT_ELF_STATE
+       select ARCH_DISABLE_KASAN_INLINE
        select ARCH_ENABLE_MEMORY_HOTPLUG
        select ARCH_ENABLE_MEMORY_HOTREMOVE
        select ARCH_HAS_ACPI_TABLE_UPGRADE      if ACPI
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_FORTIFY_SOURCE
+       select ARCH_HAS_KCOV
        select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
        select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
        select ARCH_HAS_PTE_SPECIAL
@@ -91,6 +93,9 @@ config LOONGARCH
        select HAVE_ARCH_AUDITSYSCALL
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
+       select HAVE_ARCH_KASAN
+       select HAVE_ARCH_KFENCE
+       select HAVE_ARCH_KGDB if PERF_EVENTS
        select HAVE_ARCH_MMAP_RND_BITS if MMU
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
@@ -115,6 +120,7 @@ config LOONGARCH
        select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
+       select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_IOREMAP_PROT
@@ -254,6 +260,9 @@ config AS_HAS_LSX_EXTENSION
 config AS_HAS_LASX_EXTENSION
        def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
 
+config AS_HAS_LBT_EXTENSION
+       def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
 menu "Kernel type and options"
 
 source "kernel/Kconfig.hz"
@@ -534,6 +543,18 @@ config CPU_HAS_LASX
 
          If unsure, say Y.
 
+config CPU_HAS_LBT
+       bool "Support for the Loongson Binary Translation Extension"
+       depends on AS_HAS_LBT_EXTENSION
+       help
+         Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+         to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+         Enabling this option allows the kernel to allocate and switch registers
+         specific to LBT.
+
+         If you want to use this feature, such as the Loongson Architecture
+         Translator (LAT), say Y.
+
 config CPU_HAS_PREFETCH
        bool
        default y
@@ -638,6 +659,11 @@ config ARCH_MMAP_RND_BITS_MAX
 config ARCH_SUPPORTS_UPROBES
        def_bool y
 
+config KASAN_SHADOW_OFFSET
+       hex
+       default 0x0
+       depends on KASAN
+
 menu "Power management options"
 
 config ARCH_SUSPEND_POSSIBLE
index ef87bab46754e63142761c54f688cd2850b9ebbd..fb0fada43197e4ad520359c8f2a6b6350d48d99d 100644 (file)
@@ -84,7 +84,10 @@ LDFLAGS_vmlinux                      += -static -pie --no-dynamic-linker -z notext
 endif
 
 cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
 cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
 
 load-y         = 0x9000000000200000
 bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y)
index d64849b4cba1634849e89f35b2a623c0610c39e9..a3b52aaa83b33634c5be146bc02d50589bf5f762 100644 (file)
@@ -30,7 +30,6 @@ CONFIG_NAMESPACES=y
 CONFIG_USER_NS=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
@@ -47,8 +46,12 @@ CONFIG_SMP=y
 CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
 CONFIG_SUSPEND=y
 CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
@@ -63,6 +66,7 @@ CONFIG_EFI_ZBOOT=y
 CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
 CONFIG_EFI_CAPSULE_LOADER=m
 CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
@@ -108,7 +112,12 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=m
 CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
 CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
 CONFIG_INET_ESP=m
 CONFIG_INET_UDP_DIAG=y
 CONFIG_TCP_CONG_ADVANCED=y
@@ -137,7 +146,6 @@ CONFIG_NFT_MASQ=m
 CONFIG_NFT_REDIR=m
 CONFIG_NFT_NAT=m
 CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
 CONFIG_NFT_QUEUE=m
 CONFIG_NFT_QUOTA=m
 CONFIG_NFT_REJECT=m
@@ -208,7 +216,11 @@ CONFIG_IP_VS=m
 CONFIG_IP_VS_IPV6=y
 CONFIG_IP_VS_PROTO_TCP=y
 CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
 CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
 CONFIG_IP_VS_NFCT=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_DUP_IPV4=m
@@ -227,7 +239,6 @@ CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_TARGET_NETMAP=m
 CONFIG_IP_NF_TARGET_REDIRECT=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -363,6 +374,8 @@ CONFIG_MTD_CFI_AMDSTD=m
 CONFIG_MTD_CFI_STAA=m
 CONFIG_MTD_RAM=m
 CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
 CONFIG_PARPORT=y
 CONFIG_PARPORT_PC=y
 CONFIG_PARPORT_SERIAL=y
@@ -370,6 +383,7 @@ CONFIG_PARPORT_PC_FIFO=y
 CONFIG_ZRAM=m
 CONFIG_ZRAM_DEF_COMP_ZSTD=y
 CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=8192
@@ -516,6 +530,8 @@ CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_TEHUTI is not set
 # CONFIG_NET_VENDOR_TI is not set
 # CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
 # CONFIG_NET_VENDOR_WIZNET is not set
 # CONFIG_NET_VENDOR_XILINX is not set
 CONFIG_PPP=m
@@ -602,9 +618,15 @@ CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_I2C_CHARDEV=y
 CONFIG_I2C_PIIX4=y
 CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
 CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
 CONFIG_POWER_RESET=y
 CONFIG_POWER_RESET_RESTART=y
 CONFIG_POWER_RESET_SYSCON=y
@@ -614,6 +636,7 @@ CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83795=m
 CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
 CONFIG_RC_CORE=m
 CONFIG_LIRC=y
 CONFIG_RC_DECODERS=y
@@ -643,6 +666,7 @@ CONFIG_DRM_AMDGPU_USERPTR=y
 CONFIG_DRM_AST=y
 CONFIG_DRM_QXL=m
 CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
 CONFIG_FB=y
 CONFIG_FB_EFI=y
 CONFIG_FB_RADEON=y
@@ -712,6 +736,7 @@ CONFIG_UCSI_ACPI=m
 CONFIG_INFINIBAND=m
 CONFIG_RTC_CLASS=y
 CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
 CONFIG_DMADEVICES=y
 CONFIG_UIO=m
 CONFIG_UIO_PDRV_GENIRQ=m
@@ -745,7 +770,9 @@ CONFIG_COMEDI_NI_LABPC_PCI=m
 CONFIG_COMEDI_NI_PCIDIO=m
 CONFIG_COMEDI_NI_PCIMIO=m
 CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
 CONFIG_PM_DEVFREQ=y
 CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
 CONFIG_DEVFREQ_GOV_PERFORMANCE=y
@@ -759,10 +786,17 @@ CONFIG_EXT2_FS_SECURITY=y
 CONFIG_EXT3_FS=y
 CONFIG_EXT3_FS_POSIX_ACL=y
 CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
 CONFIG_XFS_FS=y
 CONFIG_XFS_QUOTA=y
 CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
 CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
 CONFIG_QUOTA=y
@@ -771,11 +805,14 @@ CONFIG_QFMT_V1=m
 CONFIG_QFMT_V2=m
 CONFIG_AUTOFS_FS=y
 CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
 CONFIG_OVERLAY_FS=y
 CONFIG_OVERLAY_FS_INDEX=y
 CONFIG_OVERLAY_FS_XINO_AUTO=y
 CONFIG_OVERLAY_FS_METACOPY=y
 CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
 CONFIG_JOLIET=y
 CONFIG_ZISOFS=y
@@ -784,19 +821,42 @@ CONFIG_MSDOS_FS=m
 CONFIG_VFAT_FS=m
 CONFIG_FAT_DEFAULT_CODEPAGE=936
 CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
 CONFIG_HFS_FS=m
 CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
 CONFIG_CRAMFS=m
 CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XATTR=y
 CONFIG_SQUASHFS_LZ4=y
 CONFIG_SQUASHFS_LZO=y
 CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
 CONFIG_NFS_FS=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
@@ -807,6 +867,10 @@ CONFIG_NFSD=y
 CONFIG_NFSD_V3_ACL=y
 CONFIG_NFSD_V4=y
 CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
 CONFIG_CIFS=m
 # CONFIG_CIFS_DEBUG is not set
 CONFIG_9P_FS=y
@@ -814,6 +878,7 @@ CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_CODEPAGE_936=y
 CONFIG_NLS_ASCII=y
 CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
 CONFIG_KEY_DH_OPERATIONS=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_SELINUX=y
@@ -847,6 +912,7 @@ CONFIG_CRYPTO_USER_API_HASH=m
 CONFIG_CRYPTO_USER_API_SKCIPHER=m
 CONFIG_CRYPTO_USER_API_RNG=m
 CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
 CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
index ed06d3997420833c173c4deab5697ff8749dedb8..cf8e1a4e7c19dad55c6adc9a56820b02e0fae977 100644 (file)
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/uaccess.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/mmu_context.h>
 #include <asm/page.h>
 #include <asm/ftrace.h>
index 79e1d53fea89cb998f973ff557d20fb845968128..c9544f358c33991b46b67edec578af1df44939b7 100644 (file)
 #include <asm/fpregdef.h>
 #include <asm/loongarch.h>
 
-       .macro  parse_v var val
-       \var    = \val
-       .endm
-
-       .macro  parse_r var r
-       \var    = -1
-       .ifc    \r, $r0
-       \var    = 0
-       .endif
-       .ifc    \r, $r1
-       \var    = 1
-       .endif
-       .ifc    \r, $r2
-       \var    = 2
-       .endif
-       .ifc    \r, $r3
-       \var    = 3
-       .endif
-       .ifc    \r, $r4
-       \var    = 4
-       .endif
-       .ifc    \r, $r5
-       \var    = 5
-       .endif
-       .ifc    \r, $r6
-       \var    = 6
-       .endif
-       .ifc    \r, $r7
-       \var    = 7
-       .endif
-       .ifc    \r, $r8
-       \var    = 8
-       .endif
-       .ifc    \r, $r9
-       \var    = 9
-       .endif
-       .ifc    \r, $r10
-       \var    = 10
-       .endif
-       .ifc    \r, $r11
-       \var    = 11
-       .endif
-       .ifc    \r, $r12
-       \var    = 12
-       .endif
-       .ifc    \r, $r13
-       \var    = 13
-       .endif
-       .ifc    \r, $r14
-       \var    = 14
-       .endif
-       .ifc    \r, $r15
-       \var    = 15
-       .endif
-       .ifc    \r, $r16
-       \var    = 16
-       .endif
-       .ifc    \r, $r17
-       \var    = 17
-       .endif
-       .ifc    \r, $r18
-       \var    = 18
-       .endif
-       .ifc    \r, $r19
-       \var    = 19
-       .endif
-       .ifc    \r, $r20
-       \var    = 20
-       .endif
-       .ifc    \r, $r21
-       \var    = 21
-       .endif
-       .ifc    \r, $r22
-       \var    = 22
-       .endif
-       .ifc    \r, $r23
-       \var    = 23
-       .endif
-       .ifc    \r, $r24
-       \var    = 24
-       .endif
-       .ifc    \r, $r25
-       \var    = 25
-       .endif
-       .ifc    \r, $r26
-       \var    = 26
-       .endif
-       .ifc    \r, $r27
-       \var    = 27
-       .endif
-       .ifc    \r, $r28
-       \var    = 28
-       .endif
-       .ifc    \r, $r29
-       \var    = 29
-       .endif
-       .ifc    \r, $r30
-       \var    = 30
-       .endif
-       .ifc    \r, $r31
-       \var    = 31
-       .endif
-       .iflt   \var
-       .error  "Unable to parse register name \r"
-       .endif
-       .endm
-
        .macro  cpu_save_nonscratch thread
        stptr.d s0, \thread, THREAD_REG23
        stptr.d s1, \thread, THREAD_REG24
 
        .macro fpu_save_csr thread tmp
        movfcsr2gr      \tmp, fcsr0
-       stptr.w \tmp, \thread, THREAD_FCSR
+       stptr.w         \tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp, \tmp, FPU_CSR_TM
+       beqz            \tmp, 1f
+       /* Save FTOP */
+       x86mftop        \tmp
+       stptr.w         \tmp, \thread, THREAD_FTOP
+       /* Turn off TM to ensure the order of FPR in memory independent of TM */
+       x86clrtm
+1:
+#endif
        .endm
 
-       .macro fpu_restore_csr thread tmp
-       ldptr.w \tmp, \thread, THREAD_FCSR
-       movgr2fcsr      fcsr0, \tmp
+       .macro fpu_restore_csr thread tmp0 tmp1
+       ldptr.w         \tmp0, \thread, THREAD_FCSR
+       movgr2fcsr      fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 2f
+       /* Restore FTOP */
+       ldptr.w         \tmp0, \thread, THREAD_FTOP
+       andi            \tmp0, \tmp0, 0x7
+       la.pcrel        \tmp1, 1f
+       alsl.d          \tmp1, \tmp0, \tmp1, 3
+       jr              \tmp1
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+#endif
        .endm
 
        .macro fpu_save_cc thread tmp0 tmp1
        .macro  lsx_restore_all thread tmp0 tmp1
        lsx_restore_data        \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lsx_save_upper vd base tmp off
        .macro  lasx_restore_all thread tmp0 tmp1
        lasx_restore_data       \thread, \tmp0
        fpu_restore_cc          \thread, \tmp0, \tmp1
-       fpu_restore_csr         \thread, \tmp0
+       fpu_restore_csr         \thread, \tmp0, \tmp1
        .endm
 
        .macro  lasx_save_upper xd base tmp off
diff --git a/arch/loongarch/include/asm/kasan.h b/arch/loongarch/include/asm/kasan.h
new file mode 100644 (file)
index 0000000..deeff81
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET    _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT    (PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK     GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE            (XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG                (0x8000)
+#define XKPRANGE_CC_SEG                (0x9000)
+#define XKVRANGE_VC_SEG                (0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START              CACHE_BASE
+#define XKPRANGE_CC_SIZE               XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET       (0)
+#define XKPRANGE_CC_SHADOW_SIZE                (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END         (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START              UNCACHE_BASE
+#define XKPRANGE_UC_SIZE               XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET       XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE                (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END         (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached)  */
+#define XKVRANGE_VC_START              MODULES_VADDR
+#define XKVRANGE_VC_SIZE               round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET       XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE                (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END         (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START             round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE              (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END               round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET      (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+       return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+       if (!kasan_arch_is_ready()) {
+               return (void *)(kasan_early_shadow_page);
+       } else {
+               unsigned long maddr = (unsigned long)addr;
+               unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+               unsigned long offset = 0;
+
+               maddr &= XRANGE_SHADOW_MASK;
+               switch (xrange) {
+               case XKPRANGE_CC_SEG:
+                       offset = XKPRANGE_CC_SHADOW_OFFSET;
+                       break;
+               case XKPRANGE_UC_SEG:
+                       offset = XKPRANGE_UC_SHADOW_OFFSET;
+                       break;
+               case XKVRANGE_VC_SEG:
+                       offset = XKVRANGE_VC_SHADOW_OFFSET;
+                       break;
+               default:
+                       WARN_ON(1);
+                       return NULL;
+               }
+
+               return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+       }
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+       unsigned long addr = (unsigned long)shadow_addr;
+
+       if (unlikely(addr > KASAN_SHADOW_END) ||
+               unlikely(addr < KASAN_SHADOW_START)) {
+               WARN_ON(1);
+               return NULL;
+       }
+
+       if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+               return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+       else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+       else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+               return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+       else {
+               WARN_ON(1);
+               return NULL;
+       }
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..6c82aea
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+       int err;
+       char *kfence_pool = __kfence_pool;
+       struct vm_struct *area;
+
+       area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+                                   KFENCE_AREA_START, KFENCE_AREA_END,
+                                   __builtin_return_address(0));
+       if (!area)
+               return false;
+
+       __kfence_pool = (char *)area->addr;
+       err = ioremap_page_range((unsigned long)__kfence_pool,
+                                (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+                                virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+       if (err) {
+               free_vm_area(area);
+               __kfence_pool = kfence_pool;
+               return false;
+       }
+
+       return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       pte_t *pte = virt_to_kpte(addr);
+
+       if (WARN_ON(!pte) || pte_none(*pte))
+               return false;
+
+       if (protect)
+               set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+       else
+               set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+       preempt_disable();
+       local_flush_tlb_one(addr);
+       preempt_enable();
+
+       return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
diff --git a/arch/loongarch/include/asm/kgdb.h b/arch/loongarch/include/asm/kgdb.h
new file mode 100644 (file)
index 0000000..2041ae5
--- /dev/null
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG         sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ *     r0-r31: 64 bit
+ *     orig_a0: 64 bit
+ *     pc : 64 bit
+ *     csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE       0
+#define DBG_PT_REGS_NUM                35
+#define DBG_PT_REGS_END                (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ *     f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE           (DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM            32
+#define DBG_FPR_END            (DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ *     fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE           (DBG_FPR_END + 1)
+#define DBG_FCC_NUM            8
+#define DBG_FCC_END            (DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ *     fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM           1
+#define DBG_FCSR               (DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM                (DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX                 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES            ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE       4
+#define CACHE_FLUSH_IS_SAFE    0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+       DBG_LOONGARCH_ZERO = 0,
+       DBG_LOONGARCH_RA,
+       DBG_LOONGARCH_TP,
+       DBG_LOONGARCH_SP,
+       DBG_LOONGARCH_A0,
+       DBG_LOONGARCH_FP = 22,
+       DBG_LOONGARCH_S0,
+       DBG_LOONGARCH_S1,
+       DBG_LOONGARCH_S2,
+       DBG_LOONGARCH_S3,
+       DBG_LOONGARCH_S4,
+       DBG_LOONGARCH_S5,
+       DBG_LOONGARCH_S6,
+       DBG_LOONGARCH_S7,
+       DBG_LOONGARCH_S8,
+       DBG_LOONGARCH_ORIG_A0,
+       DBG_LOONGARCH_PC,
+       DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
diff --git a/arch/loongarch/include/asm/lbt.h b/arch/loongarch/include/asm/lbt.h
new file mode 100644 (file)
index 0000000..e671978
--- /dev/null
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+               1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+       return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+       if (cpu_has_lbt)
+               csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+       enable_lbt();
+       set_thread_flag(TIF_USEDLBT);
+       KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+       if (cpu_has_lbt && !is_lbt_owner()) {
+               __own_lbt();
+               if (restore)
+                       _restore_lbt(&current->thread.lbt);
+       }
+}
+
+static inline void own_lbt(int restore)
+{
+       preempt_disable();
+       own_lbt_inatomic(restore);
+       preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+       if (cpu_has_lbt && is_lbt_owner()) {
+               if (save)
+                       _save_lbt(&tsk->thread.lbt);
+
+               disable_lbt();
+               clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+       }
+       KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+       preempt_disable();
+       lose_lbt_inatomic(save, current);
+       preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+       __own_lbt();
+       _init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+       if (!cpu_has_lbt)
+               return 0;
+
+       return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
index 10748a20a2ab5593f5a19b5b2cbc5dba8ebdda2b..33531d432b492d201f98ae64b941255a94355f91 100644 (file)
 #ifndef __ASSEMBLY__
 #include <larchintrin.h>
 
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- *                     "#invtlb op, 0, %0\n\t"
- *                     ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- *                     : "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n)                            \
-       ".ifc   \\r, $r" #n "\n\t"              \
-       "\\var  = " #n "\n\t"                   \
-       ".endif\n\t"
-
-__asm__(".macro        parse_r var r\n\t"
-       "\\var  = -1\n\t"
-       _IFC_REG(0)  _IFC_REG(1)  _IFC_REG(2)  _IFC_REG(3)
-       _IFC_REG(4)  _IFC_REG(5)  _IFC_REG(6)  _IFC_REG(7)
-       _IFC_REG(8)  _IFC_REG(9)  _IFC_REG(10) _IFC_REG(11)
-       _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
-       _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
-       _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
-       _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
-       _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
-       ".iflt  \\var\n\t"
-       ".error \"Unable to parse register name \\r\"\n\t"
-       ".endif\n\t"
-       ".endm");
-
-#undef _IFC_REG
-
 /* CPUCFG */
 #define read_cpucfg(reg) __cpucfg(reg)
 
@@ -1453,6 +1410,10 @@ __BUILD_CSR_OP(tlbidx)
 #define FPU_CSR_RU     0x200   /* towards +Infinity */
 #define FPU_CSR_RD     0x300   /* towards -Infinity */
 
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT       0x6
+#define FPU_CSR_TM             (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
 #define read_fcsr(source)      \
 ({     \
        unsigned int __res;     \
index fe67d0b4b33ddce60d9b28e72b1c942b6bdca91c..2b9a90727e191224875e23f041dc3f454a2ed540 100644 (file)
@@ -13,6 +13,4 @@ extern struct pglist_data *node_data[];
 
 #define NODE_DATA(nid) (node_data[(nid)])
 
-extern void setup_zero_pages(void);
-
 #endif /* _ASM_MMZONE_H_ */
index 26e8dccb661909c2f306f2f002c730abdddaf5af..63f137ce82a41fc4e251f2adceef441e72dcd691 100644 (file)
@@ -84,7 +84,12 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define sym_to_pfn(x)          __phys_to_pfn(__pa_symbol(x))
 
 #define virt_to_pfn(kaddr)     PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr)    pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr)                                                            \
+({                                                                                     \
+       (likely((unsigned long)kaddr < vm_map_base)) ?                                  \
+       dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
 
 extern int __virt_addr_valid(volatile void *kaddr);
 #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
index 23f5b1107246a3cb5e0db4ced632e9e6aff8bcfe..79470f0b4f1d8dfbd715e4d1dc2e711949175b30 100644 (file)
@@ -94,4 +94,5 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 #endif /* __PAGETABLE_PUD_FOLDED */
 
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
 #endif /* _ASM_PGALLOC_H */
index 06963a172319da14c298cbeaca2c1f0beb80a5e8..29d9b12298bc843ecf24012d37f9042079d939e5 100644 (file)
@@ -70,12 +70,9 @@ struct vm_area_struct;
  * for zero-mapped memory areas etc..
  */
 
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 
-#define ZERO_PAGE(vaddr) \
-       (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr)       virt_to_page(empty_zero_page)
 
 /*
  * TLB refill handlers may also map the vmalloc area into xkvrange.
@@ -85,14 +82,30 @@ extern unsigned long zero_page_mask;
 #define MODULES_VADDR  (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
 #define MODULES_END    (MODULES_VADDR + SZ_256M)
 
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE       (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE       0
+#endif
+
 #define VMALLOC_START  MODULES_END
+
+#ifndef CONFIG_KASAN
 #define VMALLOC_END    \
        (vm_map_base +  \
-        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END    \
+       (vm_map_base +  \
+        min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
 
 #define vmemmap                ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
 #define VMEMMAP_END    ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
+#define KFENCE_AREA_START      (VMEMMAP_END + 1)
+#define KFENCE_AREA_END                (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
 #define pte_ERROR(e) \
        pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #ifndef __PAGETABLE_PMD_FOLDED
@@ -350,6 +363,9 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *pt
 extern pgd_t swapper_pg_dir[];
 extern pgd_t invalid_pg_dir[];
 
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
 /*
  * The following only work if pte_present() is true.
  * Undefined behaviour if not..
@@ -596,6 +612,9 @@ static inline long pmd_protnone(pmd_t pmd)
 }
 #endif /* CONFIG_NUMA_BALANCING */
 
+#define pmd_leaf(pmd)          ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud)          ((pud_val(pud) & _PAGE_HUGE) != 0)
+
 /*
  * We provide our own get_unmapped area to cope with the virtual aliasing
  * constraints placed on us by the cache architecture.
index 636e1c66398c17c7bfb930b5b225bd241f831a45..c3bc44b5f5b30b2d43b901da6c339c26594e08fc 100644 (file)
@@ -80,11 +80,22 @@ BUILD_FPR_ACCESS(32)
 BUILD_FPR_ACCESS(64)
 
 struct loongarch_fpu {
-       unsigned int    fcsr;
        uint64_t        fcc;    /* 8x8 */
+       uint32_t        fcsr;
+       uint32_t        ftop;
        union fpureg    fpr[NUM_FPU_REGS];
 };
 
+struct loongarch_lbt {
+       /* Scratch registers */
+       unsigned long scr0;
+       unsigned long scr1;
+       unsigned long scr2;
+       unsigned long scr3;
+       /* Eflags register */
+       unsigned long eflags;
+};
+
 #define INIT_CPUMASK { \
        {0,} \
 }
@@ -113,15 +124,6 @@ struct thread_struct {
        unsigned long csr_ecfg;
        unsigned long csr_badvaddr;     /* Last user fault */
 
-       /* Scratch registers */
-       unsigned long scr0;
-       unsigned long scr1;
-       unsigned long scr2;
-       unsigned long scr3;
-
-       /* Eflags register */
-       unsigned long eflags;
-
        /* Other stuff associated with the thread. */
        unsigned long trap_nr;
        unsigned long error_code;
@@ -133,6 +135,7 @@ struct thread_struct {
         * context because they are conditionally copied at fork().
         */
        struct loongarch_fpu fpu FPU_ALIGN;
+       struct loongarch_lbt lbt; /* Also conditionally copied */
 
        /* Hardware breakpoints pinned to this task. */
        struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
@@ -174,8 +177,9 @@ struct thread_struct {
         * FPU & vector registers                               \
         */                                                     \
        .fpu                    = {                             \
-               .fcsr           = 0,                            \
                .fcc            = 0,                            \
+               .fcsr           = 0,                            \
+               .ftop           = 0,                            \
                .fpr            = {{{0,},},},                   \
        },                                                      \
        .hbp_break              = {0},                          \
index be05c0e706a2e23d1e5c82859f02be62c82ce0b9..a0bc159ce8bdc0348defe27953c07d973dd58b5f 100644 (file)
@@ -7,6 +7,7 @@
 #define _LOONGARCH_SETUP_H
 
 #include <linux/types.h>
+#include <asm/sections.h>
 #include <uapi/asm/setup.h>
 
 #define VECSIZE 0x200
@@ -33,8 +34,13 @@ extern long __la_abs_end;
 extern long __rela_dyn_begin;
 extern long __rela_dyn_end;
 
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
 
 #endif
 
+static inline unsigned long kaslr_offset(void)
+{
+       return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
 #endif /* __SETUP_H */
index 7df80e6ae9d2c8d76af305b7478bd52776ed380a..4fb1e6408b982aec130c75413e04bb6d2fb6c52c 100644 (file)
        cfi_st  u0, PT_R21, \docfi
        csrrd   u0, PERCPU_BASE_KS
 9:
+#ifdef CONFIG_KGDB
+       li.w    t0, CSR_CRMD_WE
+       csrxchg t0, t0, LOONGARCH_CSR_CRMD
+#endif
        .endm
 
        .macro  SAVE_ALL docfi=0
index 7b29cc9c70aa617049a3790f8cff9632ff100a66..5bb5a90d26815133e46a9c8ca2f304350acac18e 100644 (file)
@@ -7,11 +7,31 @@
 
 #define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
 
 #define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
 
 #endif /* _ASM_STRING_H */
index 24e3094bebab166c046264280ef9b5929b350377..5b225aff3ba21aa06d0713bc8e73e1b941389630 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 
 struct task_struct;
 
@@ -34,6 +35,7 @@ extern asmlinkage struct task_struct *__switch_to(struct task_struct *prev,
 #define switch_to(prev, next, last)                                            \
 do {                                                                           \
        lose_fpu_inatomic(1, prev);                                             \
+       lose_lbt_inatomic(1, prev);                                             \
        hw_breakpoint_thread_switch(next);                                      \
        (last) = __switch_to(prev, next, task_thread_info(next),                \
                 __builtin_return_address(0), __builtin_frame_address(0));      \
index 1a3354ca056e9af11e15bfe80ffce5ad97c9e640..8cb653d49a54343ebfa26814e037ddf844c98351 100644 (file)
@@ -84,6 +84,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define TIF_SINGLESTEP         16      /* Single Step */
 #define TIF_LSX_CTX_LIVE       17      /* LSX context must be preserved */
 #define TIF_LASX_CTX_LIVE      18      /* LASX context must be preserved */
+#define TIF_USEDLBT            19      /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE       20      /* LBT context must be preserved */
 
 #define _TIF_SIGPENDING                (1<<TIF_SIGPENDING)
 #define _TIF_NEED_RESCHED      (1<<TIF_NEED_RESCHED)
@@ -101,6 +103,8 @@ register unsigned long current_stack_pointer __asm__("$sp");
 #define _TIF_SINGLESTEP                (1<<TIF_SINGLESTEP)
 #define _TIF_LSX_CTX_LIVE      (1<<TIF_LSX_CTX_LIVE)
 #define _TIF_LASX_CTX_LIVE     (1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT           (1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE      (1<<TIF_LBT_CTX_LIVE)
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/loongarch/include/asm/xor.h b/arch/loongarch/include/asm/xor.h
new file mode 100644 (file)
index 0000000..12467ff
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+       .name = "lsx",
+       .do_2 = xor_lsx_2,
+       .do_3 = xor_lsx_3,
+       .do_4 = xor_lsx_4,
+       .do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX()                                        \
+       do {                                            \
+               if (cpu_has_lsx)                        \
+                       xor_speed(&xor_block_lsx);      \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+       .name = "lasx",
+       .do_2 = xor_lasx_2,
+       .do_3 = xor_lasx_3,
+       .do_4 = xor_lasx_4,
+       .do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX()                                       \
+       do {                                                    \
+               if (cpu_has_lasx)                               \
+                       xor_speed(&xor_block_lasx);             \
+       } while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES                              \
+do {                                                   \
+       xor_speed(&xor_block_8regs);                    \
+       xor_speed(&xor_block_8regs_p);                  \
+       xor_speed(&xor_block_32regs);                   \
+       xor_speed(&xor_block_32regs_p);                 \
+       XOR_SPEED_LSX();                                \
+       XOR_SPEED_LASX();                               \
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
diff --git a/arch/loongarch/include/asm/xor_simd.h b/arch/loongarch/include/asm/xor_simd.h
new file mode 100644 (file)
index 0000000..471b963
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+               const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+               const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
index 06e3be52cb0427c91e03ac2fdddcb85b28ca860c..ac915f84165053964105383889e4d8f48c72f741 100644 (file)
@@ -56,6 +56,12 @@ struct user_lasx_state {
        uint64_t vregs[32*4];
 };
 
+struct user_lbt_state {
+       uint64_t scr[4];
+       uint32_t eflags;
+       uint32_t ftop;
+};
+
 struct user_watch_state {
        uint64_t dbg_info;
        struct {
index 4cd7d16f70377586a1d209dca634367c89713ffa..6c22f616b8f15d746316a62468a9a9a22c736443 100644 (file)
@@ -59,4 +59,14 @@ struct lasx_context {
        __u32   fcsr;
 };
 
+/* LBT context */
+#define LBT_CTX_MAGIC          0x42540001
+#define LBT_CTX_ALIGN          8
+struct lbt_context {
+       __u64   regs[4];
+       __u32   eflags;
+       __u32   ftop;
+};
+
+
 #endif /* _UAPI_ASM_SIGCONTEXT_H */
index 8e279f04f9e7a8031784fc46f0a8a41148e1337b..c56ea0b7544899e2b7ccf6aee30c8ae40fe350d0 100644 (file)
@@ -15,6 +15,8 @@ obj-$(CONFIG_EFI)             += efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)      += fpu.o kfpu.o
 
+obj-$(CONFIG_CPU_HAS_LBT)      += lbt.o
+
 obj-$(CONFIG_ARCH_STRICT_ALIGN)        += unaligned.o
 
 ifdef CONFIG_FUNCTION_TRACER
@@ -32,6 +34,12 @@ ifdef CONFIG_FUNCTION_TRACER
   CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
 endif
 
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
 obj-$(CONFIG_MODULES)          += module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)       += stacktrace.o
 
@@ -54,6 +62,7 @@ obj-$(CONFIG_UNWINDER_PROLOGUE) += unwind_prologue.o
 obj-$(CONFIG_PERF_EVENTS)      += perf_event.o perf_regs.o
 obj-$(CONFIG_HAVE_HW_BREAKPOINT)       += hw_breakpoint.o
 
+obj-$(CONFIG_KGDB)             += kgdb.o
 obj-$(CONFIG_KPROBES)          += kprobes.o
 obj-$(CONFIG_RETHOOK)          += rethook.o rethook_trampoline.o
 obj-$(CONFIG_UPROBES)          += uprobes.o
index 505e4bf59603148ba85ab979073b36933e6a9927..8da0726777edb41ea66d47f640308c18435f4551 100644 (file)
@@ -118,13 +118,6 @@ void output_thread_defines(void)
        OFFSET(THREAD_CSRECFG, task_struct,
               thread.csr_ecfg);
 
-       OFFSET(THREAD_SCR0, task_struct, thread.scr0);
-       OFFSET(THREAD_SCR1, task_struct, thread.scr1);
-       OFFSET(THREAD_SCR2, task_struct, thread.scr2);
-       OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
-       OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
        OFFSET(THREAD_FPU, task_struct, thread.fpu);
 
        OFFSET(THREAD_BVADDR, task_struct, \
@@ -172,6 +165,17 @@ void output_thread_fpu_defines(void)
 
        OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
        OFFSET(THREAD_FCC,  loongarch_fpu, fcc);
+       OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+       BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+       OFFSET(THREAD_SCR0,  loongarch_lbt, scr0);
+       OFFSET(THREAD_SCR1,  loongarch_lbt, scr1);
+       OFFSET(THREAD_SCR2,  loongarch_lbt, scr2);
+       OFFSET(THREAD_SCR3,  loongarch_lbt, scr3);
+       OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
        BLANK();
 }
 
index e925579c7a71eab8822ff683dd8444f8982e42d6..55320813ee0819f0f23429361b7fc9722b710d65 100644 (file)
@@ -144,6 +144,20 @@ static void cpu_probe_common(struct cpuinfo_loongarch *c)
                c->options |= LOONGARCH_CPU_LVZ;
                elf_hwcap |= HWCAP_LOONGARCH_LVZ;
        }
+#ifdef CONFIG_CPU_HAS_LBT
+       if (config & CPUCFG2_X86BT) {
+               c->options |= LOONGARCH_CPU_LBT_X86;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+       }
+       if (config & CPUCFG2_ARMBT) {
+               c->options |= LOONGARCH_CPU_LBT_ARM;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+       }
+       if (config & CPUCFG2_MIPSBT) {
+               c->options |= LOONGARCH_CPU_LBT_MIPS;
+               elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+       }
+#endif
 
        config = read_cpucfg(LOONGARCH_CPUCFG6);
        if (config & CPUCFG6_PMP)
index d737e3cf42d3fd8ca882e08113d58d2887db32c1..65518bb8f47285e406b95f8ba4c63a29e2c23a4a 100644 (file)
@@ -58,6 +58,11 @@ SYM_FUNC_START(handle_syscall)
 
        SAVE_STATIC
 
+#ifdef CONFIG_KGDB
+       li.w            t1, CSR_CRMD_WE
+       csrxchg         t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
        move            u0, t0
        li.d            tp, ~_THREAD_MASK
        and             tp, tp, sp
index 501094a09f5d89da2af50522b6c0fc9a9b65ec95..d53ab10f464465e3f88910614afefce92b5af607 100644 (file)
@@ -22,7 +22,7 @@
 
        .macro  EX insn, reg, src, offs
 .ex\@: \insn   \reg, \src, \offs
-       _asm_extable .ex\@, fault
+       _asm_extable .ex\@, .L_fpu_fault
        .endm
 
        .macro sc_save_fp base
        .macro sc_save_fcsr base, tmp0
        movfcsr2gr      \tmp0, fcsr0
        EX      st.w    \tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+       /* TM bit is always 0 if LBT not supported */
+       andi            \tmp0, \tmp0, FPU_CSR_TM
+       beqz            \tmp0, 1f
+       x86clrtm
+1:
+#endif
        .endm
 
        .macro sc_restore_fcsr base, tmp0
@@ -309,7 +316,7 @@ EXPORT_SYMBOL(_save_fp)
  */
 SYM_FUNC_START(_restore_fp)
        fpu_restore_double      a0 t1           # clobbers t1
-       fpu_restore_csr         a0 t1
+       fpu_restore_csr         a0 t1 t2
        fpu_restore_cc          a0 t1 t2        # clobbers t1, t2
        jr                      ra
 SYM_FUNC_END(_restore_fp)
@@ -514,7 +521,6 @@ SYM_FUNC_START(_restore_lasx_context)
        jr      ra
 SYM_FUNC_END(_restore_lasx_context)
 
-SYM_FUNC_START(fault)
+.L_fpu_fault:
        li.w    a0, -EFAULT                             # failure
        jr      ra
-SYM_FUNC_END(fault)
index 5e828a8bc0a0e0b95006449d2a1ab10b14ca2d4a..53b883db0786207318b2cfc6857a05857b9dd1a9 100644 (file)
@@ -95,12 +95,17 @@ SYM_CODE_START(kernel_entry)                        # kernel entry point
        PTR_LI          sp, (_THREAD_SIZE - PT_SIZE)
        PTR_ADD         sp, sp, tp
        set_saved_sp    sp, t0, t1
-#endif
 
-       /* relocate_kernel() returns the new kernel entry point */
-       jr              a0
-       ASM_BUG()
+       /* Jump to the new kernel: new_pc = current_pc + random_offset */
+       pcaddi          t0, 0
+       add.d           t0, t0, a0
+       jirl            zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
 
+#ifdef CONFIG_KASAN
+       bl              kasan_early_init
 #endif
 
        bl              start_kernel
index 5c46ae8c6cac13b433ccbdfbdacf194eba6d0d19..ec5b28e570c963482d18e50f28043b066a425ffc 100644 (file)
@@ -8,19 +8,40 @@
 #include <asm/fpu.h>
 #include <asm/smp.h>
 
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
 static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
 
 void kernel_fpu_begin(void)
 {
+       unsigned int *euen_curr;
+
        preempt_disable();
 
        WARN_ON(this_cpu_read(in_kernel_fpu));
 
        this_cpu_write(in_kernel_fpu, true);
+       euen_curr = this_cpu_ptr(&euen_current);
 
-       if (!is_fpu_owner())
-               enable_fpu();
+       *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _save_lasx(&current->thread.fpu);
+       else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _save_lsx(&current->thread.fpu);
        else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _save_fp(&current->thread.fpu);
 
        write_fcsr(LOONGARCH_FCSR0, 0);
@@ -29,15 +50,41 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin);
 
 void kernel_fpu_end(void)
 {
+       unsigned int *euen_curr;
+
        WARN_ON(!this_cpu_read(in_kernel_fpu));
 
-       if (!is_fpu_owner())
-               disable_fpu();
+       euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+       if (*euen_curr & CSR_EUEN_LASXEN)
+               _restore_lasx(&current->thread.fpu);
        else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       if (*euen_curr & CSR_EUEN_LSXEN)
+               _restore_lsx(&current->thread.fpu);
+       else
+#endif
+       if (*euen_curr & CSR_EUEN_FPEN)
                _restore_fp(&current->thread.fpu);
 
+       *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
        this_cpu_write(in_kernel_fpu, false);
 
        preempt_enable();
 }
 EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+       if (cpu_has_lsx)
+               euen_mask |= CSR_EUEN_LSXEN;
+
+       if (cpu_has_lasx)
+               euen_mask |= CSR_EUEN_LASXEN;
+
+       return 0;
+}
+arch_initcall(init_euen_mask);
diff --git a/arch/loongarch/kernel/kgdb.c b/arch/loongarch/kernel/kgdb.c
new file mode 100644 (file)
index 0000000..445c452
--- /dev/null
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+       { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+       { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+       { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+       { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+       { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+       { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+       { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+       { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+       { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+       { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+       { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+       { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+       { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+       { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+       { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+       { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+       { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+       { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+       { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+       { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+       { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+       { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+       { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+       { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+       { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+       { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+       { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+       { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+       { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+       { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+       { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+       { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+       { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+       { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+       { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+       { "f0", GDB_SIZEOF_REG, 0 },
+       { "f1", GDB_SIZEOF_REG, 1 },
+       { "f2", GDB_SIZEOF_REG, 2 },
+       { "f3", GDB_SIZEOF_REG, 3 },
+       { "f4", GDB_SIZEOF_REG, 4 },
+       { "f5", GDB_SIZEOF_REG, 5 },
+       { "f6", GDB_SIZEOF_REG, 6 },
+       { "f7", GDB_SIZEOF_REG, 7 },
+       { "f8", GDB_SIZEOF_REG, 8 },
+       { "f9", GDB_SIZEOF_REG, 9 },
+       { "f10", GDB_SIZEOF_REG, 10 },
+       { "f11", GDB_SIZEOF_REG, 11 },
+       { "f12", GDB_SIZEOF_REG, 12 },
+       { "f13", GDB_SIZEOF_REG, 13 },
+       { "f14", GDB_SIZEOF_REG, 14 },
+       { "f15", GDB_SIZEOF_REG, 15 },
+       { "f16", GDB_SIZEOF_REG, 16 },
+       { "f17", GDB_SIZEOF_REG, 17 },
+       { "f18", GDB_SIZEOF_REG, 18 },
+       { "f19", GDB_SIZEOF_REG, 19 },
+       { "f20", GDB_SIZEOF_REG, 20 },
+       { "f21", GDB_SIZEOF_REG, 21 },
+       { "f22", GDB_SIZEOF_REG, 22 },
+       { "f23", GDB_SIZEOF_REG, 23 },
+       { "f24", GDB_SIZEOF_REG, 24 },
+       { "f25", GDB_SIZEOF_REG, 25 },
+       { "f26", GDB_SIZEOF_REG, 26 },
+       { "f27", GDB_SIZEOF_REG, 27 },
+       { "f28", GDB_SIZEOF_REG, 28 },
+       { "f29", GDB_SIZEOF_REG, 29 },
+       { "f30", GDB_SIZEOF_REG, 30 },
+       { "f31", GDB_SIZEOF_REG, 31 },
+       { "fcc0", 1, 0 },
+       { "fcc1", 1, 1 },
+       { "fcc2", 1, 2 },
+       { "fcc3", 1, 3 },
+       { "fcc4", 1, 4 },
+       { "fcc5", 1, 5 },
+       { "fcc6", 1, 6 },
+       { "fcc7", 1, 7 },
+       { "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return NULL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               goto out;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy(mem, (void *)regs + reg_offset, reg_size);
+               goto out;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               goto out;
+
+       save_fp(current);
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy(mem, (void *)&current->thread.fpu.fcsr, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy(mem, (void *)&current->thread.fpu.fcc + reg_offset, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy(mem, (void *)&current->thread.fpu.fpr[reg_offset], reg_size);
+               break;
+       default:
+               break;
+       }
+
+out:
+       return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+       int reg_offset, reg_size;
+
+       if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+               return -EINVAL;
+
+       reg_offset = dbg_reg_def[regno].offset;
+       reg_size = dbg_reg_def[regno].size;
+
+       if (reg_offset == -1)
+               return 0;
+
+       /* Handle general-purpose/orig_a0/pc/badv registers */
+       if (regno <= DBG_PT_REGS_END) {
+               memcpy((void *)regs + reg_offset, mem, reg_size);
+               return 0;
+       }
+
+       if (!(regs->csr_euen & CSR_EUEN_FPEN))
+               return 0;
+
+       /* Handle FP registers */
+       switch (regno) {
+       case DBG_FCSR:                          /* Process the fcsr */
+               memcpy((void *)&current->thread.fpu.fcsr, mem, reg_size);
+               break;
+       case DBG_FCC_BASE ... DBG_FCC_END:      /* Process the fcc */
+               memcpy((void *)&current->thread.fpu.fcc + reg_offset, mem, reg_size);
+               break;
+       case DBG_FPR_BASE ... DBG_FPR_END:      /* Process the fpr */
+               memcpy((void *)&current->thread.fpu.fpr[reg_offset], mem, reg_size);
+               break;
+       default:
+               break;
+       }
+
+       restore_fp(current);
+
+       return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+       /* Initialize to zero */
+       memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+       gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+       gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+       gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+       /* S0 - S8 */
+       gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+       gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+       gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+       gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+       gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+       gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+       gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+       gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+       gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+       /*
+        * PC use return address (RA), i.e. the moment after return from __switch_to()
+        */
+       gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+       regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+       __asm__ __volatile__ (                  \
+               ".globl kgdb_breakinst\n\t"     \
+               "nop\n"                         \
+               "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+       struct die_args *args = (struct die_args *)ptr;
+       struct pt_regs *regs = args->regs;
+
+       /* Userspace events, ignore. */
+       if (user_mode(regs))
+               return NOTIFY_DONE;
+
+       if (!kgdb_io_module_registered)
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_active) != -1)
+               kgdb_nmicallback(smp_processor_id(), regs);
+
+       if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+               return NOTIFY_DONE;
+
+       if (atomic_read(&kgdb_setting_breakpoint))
+               if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+                       regs->csr_era += LOONGARCH_INSN_SIZE;
+
+       return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+       struct die_args args = {
+               .regs   = regs,
+               .str    = "Break",
+               .err    = BRK_KDB,
+               .trapnr = read_csr_excode(),
+               .signr  = SIGTRAP,
+
+       };
+
+       return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+       .notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+                                        char *remcom_in_buffer)
+{
+       unsigned long addr;
+       char *ptr;
+
+       ptr = &remcom_in_buffer[1];
+       if (kgdb_hex2long(&ptr, &addr))
+               regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+       char cj_val;
+       unsigned int si, si_l, si_h, rd, rj, cj;
+       unsigned long pc = instruction_pointer(regs);
+       union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+       if (pc & 3) {
+               pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+               return -EINVAL;
+       }
+
+       *next_addr = pc + LOONGARCH_INSN_SIZE;
+
+       si_h = ip->reg0i26_format.immediate_h;
+       si_l = ip->reg0i26_format.immediate_l;
+       switch (ip->reg0i26_format.opcode) {
+       case b_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               return 0;
+       case bl_op:
+               *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+               regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+               return 0;
+       }
+
+       rj = ip->reg1i21_format.rj;
+       cj = (rj & 0x07) + DBG_FCC_BASE;
+       si_l = ip->reg1i21_format.immediate_l;
+       si_h = ip->reg1i21_format.immediate_h;
+       dbg_get_reg(cj, &cj_val, regs);
+       switch (ip->reg1i21_format.opcode) {
+       case beqz_op:
+               if (regs->regs[rj] == 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bnez_op:
+               if (regs->regs[rj] != 0)
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       case bceqz_op: /* bceqz_op = bcnez_op */
+               if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+                       *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+               return 0;
+       }
+
+       rj = ip->reg2i16_format.rj;
+       rd = ip->reg2i16_format.rd;
+       si = ip->reg2i16_format.immediate;
+       switch (ip->reg2i16_format.opcode) {
+       case beq_op:
+               if (regs->regs[rj] == regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bne_op:
+               if (regs->regs[rj] != regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case blt_op:
+               if ((long)regs->regs[rj] < (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bge_op:
+               if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bltu_op:
+               if (regs->regs[rj] < regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case bgeu_op:
+               if (regs->regs[rj] >= regs->regs[rd])
+                       *next_addr = pc + sign_extend64(si << 2, 17);
+               return 0;
+       case jirl_op:
+               regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+               *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+               return 0;
+       }
+
+       return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+       int error = 0;
+       unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+       error = get_step_address(regs, &addr);
+       if (error)
+               return error;
+
+       /* Store the opcode in the stepped address */
+       error = get_kernel_nofault(stepped_opcode, (void *)addr);
+       if (error)
+               return error;
+
+       stepped_address = addr;
+
+       /* Replace the opcode with the break instruction */
+       error = copy_to_kernel_nofault((void *)stepped_address,
+                                      arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+       flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+       if (error) {
+               stepped_opcode = 0;
+               stepped_address = 0;
+       } else {
+               kgdb_single_step = 1;
+               atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+       }
+
+       return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+       if (stepped_opcode) {
+               copy_to_kernel_nofault((void *)stepped_address,
+                                      (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+               flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+       }
+
+       stepped_opcode = 0;
+       stepped_address = 0;
+       kgdb_single_step = 0;
+       atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+                              char *remcom_in_buffer, char *remcom_out_buffer,
+                              struct pt_regs *regs)
+{
+       int ret = 0;
+
+       undo_single_step(regs);
+       regs->csr_prmd |= CSR_PRMD_PWE;
+
+       switch (remcom_in_buffer[0]) {
+       case 'D':
+       case 'k':
+               regs->csr_prmd &= ~CSR_PRMD_PWE;
+               fallthrough;
+       case 'c':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               break;
+       case 's':
+               kgdb_arch_update_addr(regs, remcom_in_buffer);
+               ret = do_single_step(regs);
+               break;
+       default:
+               ret = -1;
+       }
+
+       return ret;
+}
+
+static struct hw_breakpoint {
+       unsigned int            enabled;
+       unsigned long           addr;
+       int                     len;
+       int                     type;
+       struct perf_event       * __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+       int cpu, cnt = 0;
+       struct perf_event **pevent;
+
+       for_each_online_cpu(cpu) {
+               cnt++;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_reserve_bp_slot(*pevent))
+                       goto fail;
+       }
+
+       return 0;
+
+fail:
+       for_each_online_cpu(cpu) {
+               cnt--;
+               if (!cnt)
+                       break;
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               dbg_release_bp_slot(*pevent);
+       }
+
+       return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+       int cpu;
+       struct perf_event **pevent;
+
+       if (dbg_is_early)
+               return 0;
+
+       for_each_online_cpu(cpu) {
+               pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+               if (dbg_release_bp_slot(*pevent))
+                       /*
+                        * The debugger is responsible for handing the retry on
+                        * remove failure.
+                        */
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (!breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       switch (bptype) {
+       case BP_HARDWARE_BREAKPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_X;
+               break;
+       case BP_READ_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_R;
+               break;
+       case BP_WRITE_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_W;
+               break;
+       case BP_ACCESS_WATCHPOINT:
+               breakinfo[i].type = HW_BREAKPOINT_RW;
+               break;
+       default:
+               return -1;
+       }
+
+       switch (len) {
+       case 1:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+               break;
+       case 2:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+               break;
+       case 4:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+               break;
+       case 8:
+               breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+               break;
+       default:
+               return -1;
+       }
+
+       breakinfo[i].addr = addr;
+       if (hw_break_reserve_slot(i)) {
+               breakinfo[i].addr = 0;
+               return -1;
+       }
+       breakinfo[i].enabled = 1;
+
+       return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+               if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+                       break;
+
+       if (i == LOONGARCH_MAX_BRP)
+               return -1;
+
+       if (hw_break_release_slot(i)) {
+               pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+               return -1;
+       }
+       breakinfo[i].enabled = 0;
+
+       return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled == 1)
+                       continue;
+
+               arch_uninstall_hw_breakpoint(bp);
+               bp->attr.disabled = 1;
+       }
+
+       /* Disable hardware debugging while we are in kgdb */
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+       int i;
+       int cpu = raw_smp_processor_id();
+       struct perf_event *bp;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (!bp->attr.disabled) {
+                       arch_uninstall_hw_breakpoint(bp);
+                       bp->attr.disabled = 1;
+                       continue;
+               }
+
+               if (hw_break_release_slot(i))
+                       pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+               breakinfo[i].enabled = 0;
+       }
+
+       csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+       int i, activated = 0;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               struct perf_event *bp;
+               int val;
+               int cpu = raw_smp_processor_id();
+
+               if (!breakinfo[i].enabled)
+                       continue;
+
+               bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+               if (bp->attr.disabled != 1)
+                       continue;
+
+               bp->attr.bp_addr = breakinfo[i].addr;
+               bp->attr.bp_len = breakinfo[i].len;
+               bp->attr.bp_type = breakinfo[i].type;
+
+               val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+               if (val)
+                       return;
+
+               val = arch_install_hw_breakpoint(bp);
+               if (!val)
+                       bp->attr.disabled = 0;
+               activated = 1;
+       }
+
+       csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+       kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+       .gdb_bpt_instr          = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+       .flags                  = KGDB_HW_BREAKPOINT,
+       .set_hw_breakpoint      = kgdb_set_hw_break,
+       .remove_hw_breakpoint   = kgdb_remove_hw_break,
+       .disable_hw_break       = kgdb_disable_hw_break,
+       .remove_all_hw_break    = kgdb_remove_all_hw_break,
+       .correct_hw_break       = kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+       return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+       int i, cpu;
+       struct perf_event_attr attr;
+       struct perf_event **pevent;
+
+       hw_breakpoint_init(&attr);
+
+       attr.bp_addr = (unsigned long)kgdb_arch_init;
+       attr.bp_len = HW_BREAKPOINT_LEN_4;
+       attr.bp_type = HW_BREAKPOINT_W;
+       attr.disabled = 1;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev)
+                       continue;
+
+               breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+               if (IS_ERR((void * __force)breakinfo[i].pev)) {
+                       pr_err("kgdb: Could not allocate hw breakpoints.\n");
+                       breakinfo[i].pev = NULL;
+                       return;
+               }
+
+               for_each_online_cpu(cpu) {
+                       pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+                       if (pevent[0]->destroy) {
+                               pevent[0]->destroy = NULL;
+                               release_bp_slot(*pevent);
+                       }
+               }
+       }
+}
+
+void kgdb_arch_exit(void)
+{
+       int i;
+
+       for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+               if (breakinfo[i].pev) {
+                       unregister_wide_hw_breakpoint(breakinfo[i].pev);
+                       breakinfo[i].pev = NULL;
+               }
+       }
+
+       unregister_die_notifier(&kgdb_notifier);
+}
diff --git a/arch/loongarch/kernel/lbt.S b/arch/loongarch/kernel/lbt.S
new file mode 100644 (file)
index 0000000..9c75120
--- /dev/null
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ *         Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+       .macro  EX insn, reg, src, offs
+.ex\@: \insn   \reg, \src, \offs
+       _asm_extable .ex\@, .L_lbt_fault
+       .endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+       movscr2gr       t1, $scr0               # save scr
+       stptr.d         t1, a0, THREAD_SCR0
+       movscr2gr       t1, $scr1
+       stptr.d         t1, a0, THREAD_SCR1
+       movscr2gr       t1, $scr2
+       stptr.d         t1, a0, THREAD_SCR2
+       movscr2gr       t1, $scr3
+       stptr.d         t1, a0, THREAD_SCR3
+
+       x86mfflag       t1, 0x3f                # save eflags
+       stptr.d         t1, a0, THREAD_EFLAGS
+       jr              ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+       ldptr.d         t1, a0, THREAD_SCR0     # restore scr
+       movgr2scr       $scr0, t1
+       ldptr.d         t1, a0, THREAD_SCR1
+       movgr2scr       $scr1, t1
+       ldptr.d         t1, a0, THREAD_SCR2
+       movgr2scr       $scr2, t1
+       ldptr.d         t1, a0, THREAD_SCR3
+       movgr2scr       $scr3, t1
+
+       ldptr.d         t1, a0, THREAD_EFLAGS   # restore eflags
+       x86mtflag       t1, 0x3f
+       jr              ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+       movgr2scr       $scr0, zero
+       movgr2scr       $scr1, zero
+       movgr2scr       $scr2, zero
+       movgr2scr       $scr3, zero
+
+       x86mtflag       zero, 0x3f
+       jr              ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+       movscr2gr       t1, $scr0               # save scr
+       EX      st.d    t1, a0, (0 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr1
+       EX      st.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr2
+       EX      st.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movscr2gr       t1, $scr3
+       EX      st.d    t1, a0, (3 * SCR_REG_WIDTH)
+
+       x86mfflag       t1, 0x3f                # save eflags
+       EX      st.w    t1, a1, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+       EX      ld.d    t1, a0, (0 * SCR_REG_WIDTH)     # restore scr
+       movgr2scr       $scr0, t1
+       EX      ld.d    t1, a0, (1 * SCR_REG_WIDTH)
+       movgr2scr       $scr1, t1
+       EX      ld.d    t1, a0, (2 * SCR_REG_WIDTH)
+       movgr2scr       $scr2, t1
+       EX      ld.d    t1, a0, (3 * SCR_REG_WIDTH)
+       movgr2scr       $scr3, t1
+
+       EX      ld.w    t1, a1, 0                       # restore eflags
+       x86mtflag       t1, 0x3f
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+       x86mftop        t1
+       st.w            t1, a0, 0
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+       ld.w            t1, a0, 0
+       andi            t1, t1, 0x7
+       la.pcrel        a0, 1f
+       alsl.d          a0, t1, a0, 3
+       jr              a0
+1:
+       x86mttop        0
+       b       2f
+       x86mttop        1
+       b       2f
+       x86mttop        2
+       b       2f
+       x86mttop        3
+       b       2f
+       x86mttop        4
+       b       2f
+       x86mttop        5
+       b       2f
+       x86mttop        6
+       b       2f
+       x86mttop        7
+2:
+       li.w            a0, 0                   # success
+       jr              ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+       li.w            a0, -EFAULT             # failure
+       jr              ra
index 708665895b47d314df2f13c13db75c021f7b198a..c7d33c489e048ccc0c5ec41b164b6b70c685e6d1 100644 (file)
@@ -67,39 +67,7 @@ static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
 
 void __init pcpu_populate_pte(unsigned long addr)
 {
-       pgd_t *pgd = pgd_offset_k(addr);
-       p4d_t *p4d = p4d_offset(pgd, addr);
-       pud_t *pud;
-       pmd_t *pmd;
-
-       if (p4d_none(*p4d)) {
-               pud_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
-#endif
-       }
-
-       pud = pud_offset(p4d, addr);
-       if (pud_none(*pud)) {
-               pmd_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
-#endif
-       }
-
-       pmd = pmd_offset(pud, addr);
-       if (!pmd_present(*pmd)) {
-               pte_t *new;
-
-               new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
-       }
+       populate_kernel_pte(addr);
 }
 
 void __init setup_per_cpu_areas(void)
@@ -470,7 +438,6 @@ void __init mem_init(void)
 {
        high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
        memblock_free_all();
-       setup_zero_pages();     /* This comes from node 0 */
 }
 
 int pcibus_to_node(struct pci_bus *bus)
index ba457e43f5be535012431151f3269b7b0b36ce0c..3cb082e0c99298c44ebbf062a0e6d42be7073dc6 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/elf.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/io.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
@@ -82,9 +83,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
        euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
        regs->csr_euen = euen;
        lose_fpu(0);
+       lose_lbt(0);
 
        clear_thread_flag(TIF_LSX_CTX_LIVE);
        clear_thread_flag(TIF_LASX_CTX_LIVE);
+       clear_thread_flag(TIF_LBT_CTX_LIVE);
        clear_used_math();
        regs->csr_era = pc;
        regs->regs[3] = sp;
@@ -121,10 +124,14 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 
        preempt_enable();
 
-       if (used_math())
-               memcpy(dst, src, sizeof(struct task_struct));
-       else
+       if (!used_math())
                memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+       else
+               memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+       memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
 
        return 0;
 }
@@ -189,8 +196,10 @@ out:
        ptrace_hw_copy_thread(p);
        clear_tsk_thread_flag(p, TIF_USEDFPU);
        clear_tsk_thread_flag(p, TIF_USEDSIMD);
+       clear_tsk_thread_flag(p, TIF_USEDLBT);
        clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
        clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+       clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
 
        return 0;
 }
index f72adbf530c6465a15d5f53b1b8e311dc877280b..c114c5ef13325af024be5772b396690bbab7cbc4 100644 (file)
@@ -38,6 +38,7 @@
 #include <asm/cpu.h>
 #include <asm/cpu-info.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/loongarch.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -338,6 +339,46 @@ static int simd_set(struct task_struct *target,
 
 #endif /* CONFIG_CPU_HAS_LSX */
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+                  const struct user_regset *regset,
+                  struct membuf to)
+{
+       int r;
+
+       r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+       r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+       r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+       r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+       r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+       r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+       return r;
+}
+
+static int lbt_set(struct task_struct *target,
+                  const struct user_regset *regset,
+                  unsigned int pos, unsigned int count,
+                  const void *kbuf, const void __user *ubuf)
+{
+       int err = 0;
+       const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+       const int ftop_start = eflags_start + sizeof(u32);
+
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.scr0,
+                                 0, 4 * sizeof(target->thread.lbt.scr0));
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.lbt.eflags,
+                                 eflags_start, ftop_start);
+       err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+                                 &target->thread.fpu.ftop,
+                                 ftop_start, ftop_start + sizeof(u32));
+
+       return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
 /*
@@ -802,6 +843,9 @@ enum loongarch_regset {
 #ifdef CONFIG_CPU_HAS_LASX
        REGSET_LASX,
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       REGSET_LBT,
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        REGSET_HW_BREAK,
        REGSET_HW_WATCH,
@@ -853,6 +897,16 @@ static const struct user_regset loongarch64_regsets[] = {
                .set            = simd_set,
        },
 #endif
+#ifdef CONFIG_CPU_HAS_LBT
+       [REGSET_LBT] = {
+               .core_note_type = NT_LOONGARCH_LBT,
+               .n              = 5,
+               .size           = sizeof(u64),
+               .align          = sizeof(u64),
+               .regset_get     = lbt_get,
+               .set            = lbt_set,
+       },
+#endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        [REGSET_HW_BREAK] = {
                .core_note_type = NT_LOONGARCH_HW_BREAK,
index 01f94d1e3edf6f8b26b31f30f2494a1ace76f0e8..6c3eff9af9fb1ed4cc4af8ffa9b9ea5490591ee3 100644 (file)
@@ -157,12 +157,11 @@ static inline void __init update_reloc_offset(unsigned long *addr, long random_o
        *new_addr = (unsigned long)reloc_offset;
 }
 
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
 {
        unsigned long kernel_length;
        unsigned long random_offset = 0;
        void *location_new = _text; /* Default to original kernel start */
-       void *kernel_entry = start_kernel; /* Default to original kernel entry point */
        char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
 
        strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
@@ -190,9 +189,6 @@ void * __init relocate_kernel(void)
 
                reloc_offset += random_offset;
 
-               /* Return the new kernel's entry point */
-               kernel_entry = RELOCATED_KASLR(start_kernel);
-
                /* The current thread is now within the relocated kernel */
                __current_thread_info = RELOCATED_KASLR(__current_thread_info);
 
@@ -204,7 +200,7 @@ void * __init relocate_kernel(void)
 
        relocate_absolute(random_offset);
 
-       return kernel_entry;
+       return random_offset;
 }
 
 /*
index 9d830ab4e3025e8ab7afacfc21428dfa9665afde..7783f0a3d742c7e0cce68d93cc9252eee4e7f381 100644 (file)
@@ -626,4 +626,8 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
        paging_init();
+
+#ifdef CONFIG_KASAN
+       kasan_init();
+#endif
 }
index ceb899366c0a7b2ba2793e8631792f8aaaa03ab3..504fdfe852030f54ca66a00fdde05abd4d577907 100644 (file)
@@ -32,6 +32,7 @@
 #include <asm/cacheflush.h>
 #include <asm/cpu-features.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/ucontext.h>
 #include <asm/vdso.h>
 
@@ -44,6 +45,9 @@
 /* Make sure we will not lose FPU ownership */
 #define lock_fpu_owner()       ({ preempt_disable(); pagefault_disable(); })
 #define unlock_fpu_owner()     ({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner()       ({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner()     ({ pagefault_enable(); preempt_enable(); })
 
 /* Assembly functions to move context to/from the FPU */
 extern asmlinkage int
@@ -59,6 +63,13 @@ _save_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 extern asmlinkage int
 _restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
 
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
 struct rt_sigframe {
        struct siginfo rs_info;
        struct ucontext rs_uctx;
@@ -75,6 +86,7 @@ struct extctx_layout {
        struct _ctx_layout fpu;
        struct _ctx_layout lsx;
        struct _ctx_layout lasx;
+       struct _ctx_layout lbt;
        struct _ctx_layout end;
 };
 
@@ -215,6 +227,52 @@ static int copy_lasx_from_sigcontext(struct lasx_context __user *ctx)
        return err;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __put_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __put_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __put_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __put_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __put_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+       int err = 0;
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       err |= __get_user(current->thread.lbt.scr0, &regs[0]);
+       err |= __get_user(current->thread.lbt.scr1, &regs[1]);
+       err |= __get_user(current->thread.lbt.scr2, &regs[2]);
+       err |= __get_user(current->thread.lbt.scr3, &regs[3]);
+       err |= __get_user(current->thread.lbt.eflags, eflags);
+
+       return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+       uint32_t  __user *ftop  = &ctx->ftop;
+
+       return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
 /*
  * Wrappers for the assembly _{save,restore}_fp_context functions.
  */
@@ -272,6 +330,41 @@ static int restore_hw_lasx_context(struct lasx_context __user *ctx)
        return _restore_lasx_context(regs, fcc, fcsr);
 }
 
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+       uint64_t __user *regs   = (uint64_t *)&ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+       return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+       uint32_t __user *ftop   = &ctx->ftop;
+
+       return _restore_ftop_context(ftop);
+}
+#endif
+
 static int fcsr_pending(unsigned int __user *fcsr)
 {
        int err, sig = 0;
@@ -519,6 +612,77 @@ static int protected_restore_lasx_context(struct extctx_layout *extctx)
        return err ?: sig;
 }
 
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= save_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_to_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= save_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_to_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+               err |= __put_user(extctx->lbt.size, &info->size);
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __put_user(0, &regs[0]) | __put_user(0, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+       int err = 0, tmp __maybe_unused;
+       struct sctx_info __user *info = extctx->lbt.addr;
+       struct lbt_context __user *lbt_ctx =
+               (struct lbt_context *)get_ctx_through_ctxinfo(info);
+       uint64_t __user *regs   = (uint64_t *)&lbt_ctx->regs;
+       uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+       while (1) {
+               lock_lbt_owner();
+               if (is_lbt_owner())
+                       err |= restore_hw_lbt_context(lbt_ctx);
+               else
+                       err |= copy_lbt_from_sigcontext(lbt_ctx);
+               if (is_fpu_owner())
+                       err |= restore_hw_ftop_context(lbt_ctx);
+               else
+                       err |= copy_ftop_from_sigcontext(lbt_ctx);
+               unlock_lbt_owner();
+
+               if (likely(!err))
+                       break;
+               /* Touch the LBT context and try again */
+               err = __get_user(tmp, &regs[0]) | __get_user(tmp, eflags);
+
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+#endif
+
 static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
                            struct extctx_layout *extctx)
 {
@@ -539,6 +703,11 @@ static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
        else if (extctx->fpu.addr)
                err |= protected_save_fpu_context(extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx->lbt.addr)
+               err |= protected_save_lbt_context(extctx);
+#endif
+
        /* Set the "end" magic */
        info = (struct sctx_info *)extctx->end.addr;
        err |= __put_user(0, &info->magic);
@@ -584,6 +753,13 @@ static int parse_extcontext(struct sigcontext __user *sc, struct extctx_layout *
                        extctx->lasx.addr = info;
                        break;
 
+               case LBT_CTX_MAGIC:
+                       if (size < (sizeof(struct sctx_info) +
+                                   sizeof(struct lbt_context)))
+                               goto invalid;
+                       extctx->lbt.addr = info;
+                       break;
+
                default:
                        goto invalid;
                }
@@ -636,6 +812,11 @@ static int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc
        else if (extctx.fpu.addr)
                err |= protected_restore_fpu_context(&extctx);
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (extctx.lbt.addr)
+               err |= protected_restore_lbt_context(&extctx);
+#endif
+
 bad:
        return err;
 }
@@ -700,6 +881,13 @@ static unsigned long setup_extcontext(struct extctx_layout *extctx, unsigned lon
                          sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
        }
 
+#ifdef CONFIG_CPU_HAS_LBT
+       if (cpu_has_lbt && thread_lbt_context_live()) {
+               new_sp = extframe_alloc(extctx, &extctx->lbt,
+                         sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+       }
+#endif
+
        return new_sp;
 }
 
index 2463d2fea21f5f4bc9dd762fd5b1ed3c77c0fc40..92270f14db948271b00167ef0887703620b50c31 100644 (file)
@@ -18,17 +18,19 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
        struct pt_regs dummyregs;
        struct unwind_state state;
 
-       regs = &dummyregs;
+       if (!regs) {
+               regs = &dummyregs;
 
-       if (task == current) {
-               regs->regs[3] = (unsigned long)__builtin_frame_address(0);
-               regs->csr_era = (unsigned long)__builtin_return_address(0);
-       } else {
-               regs->regs[3] = thread_saved_fp(task);
-               regs->csr_era = thread_saved_ra(task);
+               if (task == current) {
+                       regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+                       regs->csr_era = (unsigned long)__builtin_return_address(0);
+               } else {
+                       regs->regs[3] = thread_saved_fp(task);
+                       regs->csr_era = thread_saved_ra(task);
+               }
+               regs->regs[1] = 0;
        }
 
-       regs->regs[1] = 0;
        for (unwind_start(&state, task, regs);
             !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
                addr = unwind_get_return_address(&state);
index 89699db45cec17ab87724ca16b2d24995b88e99b..65214774ef7c6bade648ccf4f8702cdefc7baac1 100644 (file)
@@ -36,7 +36,9 @@
 #include <asm/break.h>
 #include <asm/cpu.h>
 #include <asm/fpu.h>
+#include <asm/lbt.h>
 #include <asm/inst.h>
+#include <asm/kgdb.h>
 #include <asm/loongarch.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
@@ -702,6 +704,11 @@ asmlinkage void noinstr do_bp(struct pt_regs *regs)
         * pertain to them.
         */
        switch (bcode) {
+       case BRK_KDB:
+               if (kgdb_breakpoint_handler(regs))
+                       goto out;
+               else
+                       break;
        case BRK_KPROBE_BP:
                if (kprobe_breakpoint_handler(regs))
                        goto out;
@@ -768,6 +775,9 @@ asmlinkage void noinstr do_watch(struct pt_regs *regs)
 #ifndef CONFIG_HAVE_HW_BREAKPOINT
        pr_warn("Hardware watch point handler not implemented!\n");
 #else
+       if (kgdb_breakpoint_handler(regs))
+               goto out;
+
        if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
                int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
                unsigned long pc = instruction_pointer(regs);
@@ -966,13 +976,47 @@ out:
        irqentry_exit(regs, state);
 }
 
+static void init_restore_lbt(void)
+{
+       if (!thread_lbt_context_live()) {
+               /* First time LBT context user */
+               init_lbt();
+               set_thread_flag(TIF_LBT_CTX_LIVE);
+       } else {
+               if (!is_lbt_owner())
+                       own_lbt_inatomic(1);
+       }
+
+       BUG_ON(!is_lbt_enabled());
+}
+
 asmlinkage void noinstr do_lbt(struct pt_regs *regs)
 {
        irqentry_state_t state = irqentry_enter(regs);
 
-       local_irq_enable();
-       force_sig(SIGILL);
-       local_irq_disable();
+       /*
+        * BTD (Binary Translation Disable exception) can be triggered
+        * during FP save/restore if TM (Top Mode) is on, which may
+        * cause irq_enable during 'switch_to'. To avoid this situation
+        * (including the user using 'MOVGR2GCSR' to turn on TM, which
+        * will not trigger the BTE), we need to check PRMD first.
+        */
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_enable();
+
+       if (!cpu_has_lbt) {
+               force_sig(SIGILL);
+               goto out;
+       }
+       BUG_ON(is_lbt_enabled());
+
+       preempt_disable();
+       init_restore_lbt();
+       preempt_enable();
+
+out:
+       if (regs->csr_prmd & CSR_PRMD_PIE)
+               local_irq_disable();
 
        irqentry_exit(regs, state);
 }
index d60d4e096cfa9e4ea1a5b3e1d2f01f1be26bc9b1..a77bf160bfc4246fa34cc666f037ace2eaf01996 100644 (file)
@@ -6,4 +6,6 @@
 lib-y  += delay.o memset.o memcpy.o memmove.o \
           clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
index 0790eadce166dee8b3b9f96c8a371dadc4259353..be741544e62bf63f7198d451c72c8f47eb0501d7 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a1, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__clear_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -51,7 +38,7 @@ SYM_FUNC_START(__clear_user_generic)
 2:     move    a0, a1
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
+       _asm_extable 1b, 2b
 SYM_FUNC_END(__clear_user_generic)
 
 /*
@@ -173,33 +160,47 @@ SYM_FUNC_START(__clear_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_1
-       _asm_extable 3b, .L_fixup_handle_2
-       _asm_extable 4b, .L_fixup_handle_3
-       _asm_extable 5b, .L_fixup_handle_4
-       _asm_extable 6b, .L_fixup_handle_5
-       _asm_extable 7b, .L_fixup_handle_6
-       _asm_extable 8b, .L_fixup_handle_7
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_1
-       _asm_extable 11b, .L_fixup_handle_2
-       _asm_extable 12b, .L_fixup_handle_3
-       _asm_extable 13b, .L_fixup_handle_0
-       _asm_extable 14b, .L_fixup_handle_1
-       _asm_extable 15b, .L_fixup_handle_0
-       _asm_extable 16b, .L_fixup_handle_0
-       _asm_extable 17b, .L_fixup_handle_s0
-       _asm_extable 18b, .L_fixup_handle_s0
-       _asm_extable 19b, .L_fixup_handle_s0
-       _asm_extable 20b, .L_fixup_handle_s2
-       _asm_extable 21b, .L_fixup_handle_s0
-       _asm_extable 22b, .L_fixup_handle_s0
-       _asm_extable 23b, .L_fixup_handle_s4
-       _asm_extable 24b, .L_fixup_handle_s0
-       _asm_extable 25b, .L_fixup_handle_s4
-       _asm_extable 26b, .L_fixup_handle_s0
-       _asm_extable 27b, .L_fixup_handle_s4
-       _asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a1, a2, a0
+
+.Lsmall_fixup:
+29:    st.b    zero, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, -1
+       bgt     a1, zero, 29b
+
+.Lexit:
+       move    a0, a1
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Llarge_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Lexit
+       _asm_extable 18b, .Lsmall_fixup
+       _asm_extable 19b, .Lsmall_fixup
+       _asm_extable 20b, .Lsmall_fixup
+       _asm_extable 21b, .Lsmall_fixup
+       _asm_extable 22b, .Lsmall_fixup
+       _asm_extable 23b, .Lsmall_fixup
+       _asm_extable 24b, .Lsmall_fixup
+       _asm_extable 25b, .Lsmall_fixup
+       _asm_extable 26b, .Lsmall_fixup
+       _asm_extable 27b, .Lsmall_fixup
+       _asm_extable 28b, .Lsmall_fixup
+       _asm_extable 29b, .Lexit
 SYM_FUNC_END(__clear_user_fast)
index bfe3d2793d0002cf68338000e202059be9cbc44a..feec3d3628032f433dbf65e006b8a07acfbc161c 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
-       sub.d   a0, a2, a0
-       addi.d  a0, a0, (\to) * (-8)
-       jr      ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
-       addi.d  a0, a2, -\to
-       jr      ra
-.endr
-
 SYM_FUNC_START(__copy_user)
        /*
         * Some CPUs support hardware unaligned access
@@ -54,8 +41,8 @@ SYM_FUNC_START(__copy_user_generic)
 3:     move    a0, a2
        jr      ra
 
-       _asm_extable 1b, .L_fixup_handle_s0
-       _asm_extable 2b, .L_fixup_handle_s0
+       _asm_extable 1b, 3b
+       _asm_extable 2b, 3b
 SYM_FUNC_END(__copy_user_generic)
 
 /*
@@ -69,10 +56,10 @@ SYM_FUNC_START(__copy_user_fast)
        sltui   t0, a2, 9
        bnez    t0, .Lsmall
 
-       add.d   a3, a1, a2
-       add.d   a2, a0, a2
 0:     ld.d    t0, a1, 0
 1:     st.d    t0, a0, 0
+       add.d   a3, a1, a2
+       add.d   a2, a0, a2
 
        /* align up destination address */
        andi    t1, a0, 7
@@ -94,7 +81,6 @@ SYM_FUNC_START(__copy_user_fast)
 7:     ld.d    t5, a1, 40
 8:     ld.d    t6, a1, 48
 9:     ld.d    t7, a1, 56
-       addi.d  a1, a1, 64
 10:    st.d    t0, a0, 0
 11:    st.d    t1, a0, 8
 12:    st.d    t2, a0, 16
@@ -103,6 +89,7 @@ SYM_FUNC_START(__copy_user_fast)
 15:    st.d    t5, a0, 40
 16:    st.d    t6, a0, 48
 17:    st.d    t7, a0, 56
+       addi.d  a1, a1, 64
        addi.d  a0, a0, 64
        bltu    a1, a4, .Lloop64
 
@@ -114,11 +101,11 @@ SYM_FUNC_START(__copy_user_fast)
 19:    ld.d    t1, a1, 8
 20:    ld.d    t2, a1, 16
 21:    ld.d    t3, a1, 24
-       addi.d  a1, a1, 32
 22:    st.d    t0, a0, 0
 23:    st.d    t1, a0, 8
 24:    st.d    t2, a0, 16
 25:    st.d    t3, a0, 24
+       addi.d  a1, a1, 32
        addi.d  a0, a0, 32
 
 .Llt32:
@@ -126,9 +113,9 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt16
 26:    ld.d    t0, a1, 0
 27:    ld.d    t1, a1, 8
-       addi.d  a1, a1, 16
 28:    st.d    t0, a0, 0
 29:    st.d    t1, a0, 8
+       addi.d  a1, a1, 16
        addi.d  a0, a0, 16
 
 .Llt16:
@@ -136,6 +123,7 @@ SYM_FUNC_START(__copy_user_fast)
        bgeu    a1, a4, .Llt8
 30:    ld.d    t0, a1, 0
 31:    st.d    t0, a0, 0
+       addi.d  a1, a1, 8
        addi.d  a0, a0, 8
 
 .Llt8:
@@ -214,62 +202,79 @@ SYM_FUNC_START(__copy_user_fast)
        jr      ra
 
        /* fixup and ex_table */
-       _asm_extable 0b, .L_fixup_handle_0
-       _asm_extable 1b, .L_fixup_handle_0
-       _asm_extable 2b, .L_fixup_handle_0
-       _asm_extable 3b, .L_fixup_handle_0
-       _asm_extable 4b, .L_fixup_handle_0
-       _asm_extable 5b, .L_fixup_handle_0
-       _asm_extable 6b, .L_fixup_handle_0
-       _asm_extable 7b, .L_fixup_handle_0
-       _asm_extable 8b, .L_fixup_handle_0
-       _asm_extable 9b, .L_fixup_handle_0
-       _asm_extable 10b, .L_fixup_handle_0
-       _asm_extable 11b, .L_fixup_handle_1
-       _asm_extable 12b, .L_fixup_handle_2
-       _asm_extable 13b, .L_fixup_handle_3
-       _asm_extable 14b, .L_fixup_handle_4
-       _asm_extable 15b, .L_fixup_handle_5
-       _asm_extable 16b, .L_fixup_handle_6
-       _asm_extable 17b, .L_fixup_handle_7
-       _asm_extable 18b, .L_fixup_handle_0
-       _asm_extable 19b, .L_fixup_handle_0
-       _asm_extable 20b, .L_fixup_handle_0
-       _asm_extable 21b, .L_fixup_handle_0
-       _asm_extable 22b, .L_fixup_handle_0
-       _asm_extable 23b, .L_fixup_handle_1
-       _asm_extable 24b, .L_fixup_handle_2
-       _asm_extable 25b, .L_fixup_handle_3
-       _asm_extable 26b, .L_fixup_handle_0
-       _asm_extable 27b, .L_fixup_handle_0
-       _asm_extable 28b, .L_fixup_handle_0
-       _asm_extable 29b, .L_fixup_handle_1
-       _asm_extable 30b, .L_fixup_handle_0
-       _asm_extable 31b, .L_fixup_handle_0
-       _asm_extable 32b, .L_fixup_handle_0
-       _asm_extable 33b, .L_fixup_handle_0
-       _asm_extable 34b, .L_fixup_handle_s0
-       _asm_extable 35b, .L_fixup_handle_s0
-       _asm_extable 36b, .L_fixup_handle_s0
-       _asm_extable 37b, .L_fixup_handle_s0
-       _asm_extable 38b, .L_fixup_handle_s0
-       _asm_extable 39b, .L_fixup_handle_s0
-       _asm_extable 40b, .L_fixup_handle_s0
-       _asm_extable 41b, .L_fixup_handle_s2
-       _asm_extable 42b, .L_fixup_handle_s0
-       _asm_extable 43b, .L_fixup_handle_s0
-       _asm_extable 44b, .L_fixup_handle_s0
-       _asm_extable 45b, .L_fixup_handle_s0
-       _asm_extable 46b, .L_fixup_handle_s0
-       _asm_extable 47b, .L_fixup_handle_s4
-       _asm_extable 48b, .L_fixup_handle_s0
-       _asm_extable 49b, .L_fixup_handle_s0
-       _asm_extable 50b, .L_fixup_handle_s0
-       _asm_extable 51b, .L_fixup_handle_s4
-       _asm_extable 52b, .L_fixup_handle_s0
-       _asm_extable 53b, .L_fixup_handle_s0
-       _asm_extable 54b, .L_fixup_handle_s0
-       _asm_extable 55b, .L_fixup_handle_s4
-       _asm_extable 56b, .L_fixup_handle_s0
-       _asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+       sub.d   a2, a2, a0
+
+.Lsmall_fixup:
+58:    ld.b    t0, a1, 0
+59:    st.b    t0, a0, 0
+       addi.d  a0, a0, 1
+       addi.d  a1, a1, 1
+       addi.d  a2, a2, -1
+       bgt     a2, zero, 58b
+
+.Lexit:
+       move    a0, a2
+       jr      ra
+
+       _asm_extable 0b, .Lsmall_fixup
+       _asm_extable 1b, .Lsmall_fixup
+       _asm_extable 2b, .Llarge_fixup
+       _asm_extable 3b, .Llarge_fixup
+       _asm_extable 4b, .Llarge_fixup
+       _asm_extable 5b, .Llarge_fixup
+       _asm_extable 6b, .Llarge_fixup
+       _asm_extable 7b, .Llarge_fixup
+       _asm_extable 8b, .Llarge_fixup
+       _asm_extable 9b, .Llarge_fixup
+       _asm_extable 10b, .Llarge_fixup
+       _asm_extable 11b, .Llarge_fixup
+       _asm_extable 12b, .Llarge_fixup
+       _asm_extable 13b, .Llarge_fixup
+       _asm_extable 14b, .Llarge_fixup
+       _asm_extable 15b, .Llarge_fixup
+       _asm_extable 16b, .Llarge_fixup
+       _asm_extable 17b, .Llarge_fixup
+       _asm_extable 18b, .Llarge_fixup
+       _asm_extable 19b, .Llarge_fixup
+       _asm_extable 20b, .Llarge_fixup
+       _asm_extable 21b, .Llarge_fixup
+       _asm_extable 22b, .Llarge_fixup
+       _asm_extable 23b, .Llarge_fixup
+       _asm_extable 24b, .Llarge_fixup
+       _asm_extable 25b, .Llarge_fixup
+       _asm_extable 26b, .Llarge_fixup
+       _asm_extable 27b, .Llarge_fixup
+       _asm_extable 28b, .Llarge_fixup
+       _asm_extable 29b, .Llarge_fixup
+       _asm_extable 30b, .Llarge_fixup
+       _asm_extable 31b, .Llarge_fixup
+       _asm_extable 32b, .Llarge_fixup
+       _asm_extable 33b, .Llarge_fixup
+       _asm_extable 34b, .Lexit
+       _asm_extable 35b, .Lexit
+       _asm_extable 36b, .Lsmall_fixup
+       _asm_extable 37b, .Lsmall_fixup
+       _asm_extable 38b, .Lsmall_fixup
+       _asm_extable 39b, .Lsmall_fixup
+       _asm_extable 40b, .Lsmall_fixup
+       _asm_extable 41b, .Lsmall_fixup
+       _asm_extable 42b, .Lsmall_fixup
+       _asm_extable 43b, .Lsmall_fixup
+       _asm_extable 44b, .Lsmall_fixup
+       _asm_extable 45b, .Lsmall_fixup
+       _asm_extable 46b, .Lsmall_fixup
+       _asm_extable 47b, .Lsmall_fixup
+       _asm_extable 48b, .Lsmall_fixup
+       _asm_extable 49b, .Lsmall_fixup
+       _asm_extable 50b, .Lsmall_fixup
+       _asm_extable 51b, .Lsmall_fixup
+       _asm_extable 52b, .Lsmall_fixup
+       _asm_extable 53b, .Lsmall_fixup
+       _asm_extable 54b, .Lsmall_fixup
+       _asm_extable 55b, .Lsmall_fixup
+       _asm_extable 56b, .Lsmall_fixup
+       _asm_extable 57b, .Lsmall_fixup
+       _asm_extable 58b, .Lexit
+       _asm_extable 59b, .Lexit
 SYM_FUNC_END(__copy_user_fast)
index cc30b3b6252f7fd2ecc70fca7b0ffed82d64714f..fa1148878d2b9d06ccdfcf6f2c14fd302fd14114 100644 (file)
@@ -10,6 +10,8 @@
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memcpy)
        /*
         * Some CPUs support hardware unaligned access
@@ -17,9 +19,13 @@ SYM_FUNC_START(memcpy)
        ALTERNATIVE     "b __memcpy_generic", \
                        "b __memcpy_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
 
 EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
 
 /*
  * void *__memcpy_generic(void *dst, const void *src, size_t n)
index 7dc76d1484b6a84252a76c5f334a69d252d24465..82dae062fec85497a3c01d4fb3084df8ebb277c9 100644 (file)
 #include <asm/cpu.h>
 #include <asm/regdef.h>
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memmove)
-       blt     a0, a1, memcpy  /* dst < src, memcpy */
-       blt     a1, a0, rmemcpy /* src < dst, rmemcpy */
-       jr      ra              /* dst == src, return */
+       blt     a0, a1, __memcpy        /* dst < src, memcpy */
+       blt     a1, a0, __rmemcpy       /* src < dst, rmemcpy */
+       jr      ra                      /* dst == src, return */
 SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
 
 EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
 
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
        /*
         * Some CPUs support hardware unaligned access
         */
        ALTERNATIVE     "b __rmemcpy_generic", \
                        "b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
 
 /*
  * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
index 3f20f7996e8ed6e3a38ffcffb1eab7629275da17..06d3ca54cbfe7d73c6cc7a5cadeca4d558b3e6dc 100644 (file)
@@ -16,6 +16,8 @@
        bstrins.d \r0, \r0, 63, 32
 .endm
 
+.section .noinstr.text, "ax"
+
 SYM_FUNC_START(memset)
        /*
         * Some CPUs support hardware unaligned access
@@ -23,9 +25,13 @@ SYM_FUNC_START(memset)
        ALTERNATIVE     "b __memset_generic", \
                        "b __memset_fast", CPU_FEATURE_UAL
 SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
 
 EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
 
 /*
  * void *__memset_generic(void *s, int c, size_t n)
diff --git a/arch/loongarch/lib/xor_simd.c b/arch/loongarch/lib/xor_simd.c
new file mode 100644 (file)
index 0000000..84cd24b
--- /dev/null
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset)  \
+       "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 16)         \
+       LD(2, base, 32)         \
+       LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(4, base, 0)          \
+       LD(5, base, 16)         \
+       LD(6, base, 32)         \
+       LD(7, base, 48)         \
+       XOR(0, 4)               \
+       XOR(1, 5)               \
+       XOR(2, 6)               \
+       XOR(3, 7)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 16)         \
+       ST(2, base, 32)         \
+       ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset)  \
+       "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset)  \
+       "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k)     "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base)    \
+       LD(0, base, 0)          \
+       LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base)  \
+       LD(2, base, 0)          \
+       LD(3, base, 32)         \
+       XOR(0, 2)               \
+       XOR(1, 3)
+
+#define ST_LINE(base)          \
+       ST(0, base, 0)          \
+       ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/arch/loongarch/lib/xor_simd.h b/arch/loongarch/lib/xor_simd.h
new file mode 100644 (file)
index 0000000..f50f325
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+                const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+                 const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+                 const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
diff --git a/arch/loongarch/lib/xor_simd_glue.c b/arch/loongarch/lib/xor_simd_glue.c
new file mode 100644 (file)
index 0000000..393f689
--- /dev/null
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor)                                                        \
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_2(bytes, p1, p2);                                      \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor)                                                        \
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_3(bytes, p1, p2, p3);                                  \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor)                                                        \
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_4(bytes, p1, p2, p3, p4);                              \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor)                                                        \
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1,      \
+                     const unsigned long * __restrict p2,                      \
+                     const unsigned long * __restrict p3,                      \
+                     const unsigned long * __restrict p4,                      \
+                     const unsigned long * __restrict p5)                      \
+{                                                                              \
+       kernel_fpu_begin();                                                     \
+       __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5);                          \
+       kernel_fpu_end();                                                       \
+}                                                                              \
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor)         \
+       MAKE_XOR_GLUE_2(flavor);        \
+       MAKE_XOR_GLUE_3(flavor);        \
+       MAKE_XOR_GLUE_4(flavor);        \
+       MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
diff --git a/arch/loongarch/lib/xor_template.c b/arch/loongarch/lib/xor_template.c
new file mode 100644 (file)
index 0000000..0358ced
--- /dev/null
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+               : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+                     unsigned long * __restrict v1,
+                     const unsigned long * __restrict v2,
+                     const unsigned long * __restrict v3,
+                     const unsigned long * __restrict v4,
+                     const unsigned long * __restrict v5)
+{
+       unsigned long lines = bytes / LINE_WIDTH;
+
+       do {
+               __asm__ __volatile__ (
+                       LD_INOUT_LINE(v1)
+                       LD_AND_XOR_LINE(v2)
+                       LD_AND_XOR_LINE(v3)
+                       LD_AND_XOR_LINE(v4)
+                       LD_AND_XOR_LINE(v5)
+                       ST_LINE(v1)
+               : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+                   [v5] "r"(v5) : "memory"
+               );
+
+               v1 += LINE_WIDTH / sizeof(unsigned long);
+               v2 += LINE_WIDTH / sizeof(unsigned long);
+               v3 += LINE_WIDTH / sizeof(unsigned long);
+               v4 += LINE_WIDTH / sizeof(unsigned long);
+               v5 += LINE_WIDTH / sizeof(unsigned long);
+       } while (--lines > 0);
+}
index 8ffc6383f8360e33a833be7a09de3934c89a5d91..e4d1e581dbae3aee294ad1eb81ec72516dbcd9ae 100644 (file)
@@ -7,3 +7,6 @@ obj-y                           += init.o cache.o tlb.o tlbex.o extable.o \
                                   fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
 
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
+obj-$(CONFIG_KASAN)            += kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o     := n
index 72685a48eaf084f61ce967bcd16fd0bb79bf9ce4..6be04d36ca0769658a2b52d25af50dd6ad7e07e0 100644 (file)
@@ -156,7 +156,6 @@ void cpu_cache_init(void)
 
        current_cpu_data.cache_leaves_present = leaf;
        current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
-       shm_align_mask = PAGE_SIZE - 1;
 }
 
 static const pgprot_t protection_map[16] = {
index da5b6d518cdb1d6c5ec550083f52f588ccf29066..e6376e3dce862ff83d5995154ab48331a59d4586 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/kprobes.h>
 #include <linux/perf_event.h>
 #include <linux/uaccess.h>
+#include <linux/kfence.h>
 
 #include <asm/branch.h>
 #include <asm/mmu_context.h>
@@ -30,7 +31,8 @@
 
 int show_unhandled_signals = 1;
 
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        const int field = sizeof(unsigned long) * 2;
 
@@ -38,6 +40,9 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        if (fixup_exception(regs))
                return;
 
+       if (kfence_handle_page_fault(address, write, regs))
+               return;
+
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
@@ -51,14 +56,15 @@ static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
        die("Oops", regs);
 }
 
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+                       unsigned long write, unsigned long address)
 {
        /*
         * We ran out of memory, call the OOM killer, and return the userspace
         * (which will retry the fault, or kill us if we got oom-killed).
         */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
        pagefault_out_of_memory();
@@ -69,7 +75,7 @@ static void __kprobes do_sigbus(struct pt_regs *regs,
 {
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -90,7 +96,7 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
 
        /* Kernel mode? Handle exceptions or die */
        if (!user_mode(regs)) {
-               no_context(regs, address);
+               no_context(regs, write, address);
                return;
        }
 
@@ -149,7 +155,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
         */
        if (address & __UA_LIMIT) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                else
                        do_sigsegv(regs, write, address, si_code);
                return;
@@ -211,7 +217,7 @@ good_area:
 
        if (fault_signal_pending(fault, regs)) {
                if (!user_mode(regs))
-                       no_context(regs, address);
+                       no_context(regs, write, address);
                return;
        }
 
@@ -232,7 +238,7 @@ good_area:
        if (unlikely(fault & VM_FAULT_ERROR)) {
                mmap_read_unlock(mm);
                if (fault & VM_FAULT_OOM) {
-                       do_out_of_memory(regs, address);
+                       do_out_of_memory(regs, write, address);
                        return;
                } else if (fault & VM_FAULT_SIGSEGV) {
                        do_sigsegv(regs, write, address, si_code);
index 3b7d8129570b83ac87455e8bb1c2f7c8a0ee35b1..f3fe8c06ba4db352ca373824254b782dc9893226 100644 (file)
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
 
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed.         Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
 EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
-       unsigned int order, i;
-       struct page *page;
-
-       order = 0;
-
-       empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
-       if (!empty_zero_page)
-               panic("Oh boy, that early out of memory?");
-
-       page = virt_to_page((void *)empty_zero_page);
-       split_page(page, order);
-       for (i = 0; i < (1 << order); i++, page++)
-               mark_page_reserved(page);
-
-       zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
 
 void copy_user_highpage(struct page *to, struct page *from,
        unsigned long vaddr, struct vm_area_struct *vma)
@@ -106,7 +81,6 @@ void __init mem_init(void)
        high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
        memblock_free_all();
-       setup_zero_pages();     /* Setup zeroed pages.  */
 }
 #endif /* !CONFIG_NUMA */
 
@@ -191,43 +165,42 @@ void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *al
 #endif
 #endif
 
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
 {
-       pgd_t *pgd;
-       p4d_t *p4d;
+       pgd_t *pgd = pgd_offset_k(addr);
+       p4d_t *p4d = p4d_offset(pgd, addr);
        pud_t *pud;
        pmd_t *pmd;
 
-       pgd = pgd_offset_k(addr);
-       p4d = p4d_offset(pgd, addr);
-
-       if (pgd_none(*pgd)) {
-               pud_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pgd_populate(&init_mm, pgd, new);
+       if (p4d_none(*p4d)) {
+               pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pud)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               p4d_populate(&init_mm, p4d, pud);
 #ifndef __PAGETABLE_PUD_FOLDED
-               pud_init(new);
+               pud_init(pud);
 #endif
        }
 
        pud = pud_offset(p4d, addr);
        if (pud_none(*pud)) {
-               pmd_t *new __maybe_unused;
-
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pud_populate(&init_mm, pud, new);
+               pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pmd)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pud_populate(&init_mm, pud, pmd);
 #ifndef __PAGETABLE_PMD_FOLDED
-               pmd_init(new);
+               pmd_init(pmd);
 #endif
        }
 
        pmd = pmd_offset(pud, addr);
-       if (pmd_none(*pmd)) {
-               pte_t *new __maybe_unused;
+       if (!pmd_present(*pmd)) {
+               pte_t *pte;
 
-               new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
-               pmd_populate_kernel(&init_mm, pmd, new);
+               pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+               if (!pte)
+                       panic("%s: Failed to allocate memory\n", __func__);
+               pmd_populate_kernel(&init_mm, pmd, pte);
        }
 
        return pte_offset_kernel(pmd, addr);
@@ -241,7 +214,7 @@ void __init __set_fixmap(enum fixed_addresses idx,
 
        BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
 
-       ptep = fixmap_pte(addr);
+       ptep = populate_kernel_pte(addr);
        if (!pte_none(*ptep)) {
                pte_ERROR(*ptep);
                return;
diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c
new file mode 100644 (file)
index 0000000..da68bc1
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+       void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+                                       __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+       if (!p)
+               panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+                       __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+       return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+       if (__pmd_none(early, READ_ONCE(*pmdp))) {
+               phys_addr_t pte_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+               pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+       }
+
+       return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+       if (__pud_none(early, READ_ONCE(*pudp))) {
+               phys_addr_t pmd_phys = early ?
+                               __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+               pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+       }
+
+       return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+       if (__p4d_none(early, READ_ONCE(*p4dp))) {
+               phys_addr_t pud_phys = early ?
+                       __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+               if (!early)
+                       memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+               p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+       }
+
+       return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+       do {
+               phys_addr_t page_phys = early ?
+                                       __pa_symbol(kasan_early_shadow_page)
+                                             : kasan_alloc_zeroed_page(node);
+               next = addr + PAGE_SIZE;
+               set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+       } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+                                     unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+       do {
+               next = pmd_addr_end(addr, end);
+               kasan_pte_populate(pmdp, addr, next, node, early);
+       } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+       do {
+               next = pud_addr_end(addr, end);
+               kasan_pmd_populate(pudp, addr, next, node, early);
+       } while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+                                           unsigned long end, int node, bool early)
+{
+       unsigned long next;
+       p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+       do {
+               next = p4d_addr_end(addr, end);
+               kasan_pud_populate(p4dp, addr, next, node, early);
+       } while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+                                     int node, bool early)
+{
+       unsigned long next;
+       pgd_t *pgdp;
+
+       pgdp = pgd_offset_k(addr);
+
+       do {
+               next = pgd_addr_end(addr, end);
+               kasan_p4d_populate(pgdp, addr, next, node, early);
+       } while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+                                     int node)
+{
+       kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+       WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+       /*
+        * Remove references to kasan page tables from
+        * swapper_pg_dir. pgd_clear() can't be used
+        * here because it's nop on 2,3-level pagetable setups
+        */
+       for (; start < end; start += PGDIR_SIZE)
+               kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+       u64 i;
+       phys_addr_t pa_start, pa_end;
+
+       /*
+        * PGD was populated as invalid_pmd_table or invalid_pud_table
+        * in pagetable_init() which depends on how many levels of page
+        * table you are using, but we had to clean the gpd of kasan
+        * shadow memory, as the pgd value is none-zero.
+        * The assertion pgd_none is going to be false and the formal populate
+        * afterwards is not going to create any new pgd at all.
+        */
+       memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+       csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+       /* Maps everything to a single page of zeroes */
+       kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+       kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+                                       kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+       kasan_early_stage = false;
+
+       /* Populate the linear mapping */
+       for_each_mem_range(i, &pa_start, &pa_end) {
+               void *start = (void *)phys_to_virt(pa_start);
+               void *end   = (void *)phys_to_virt(pa_end);
+
+               if (start >= end)
+                       break;
+
+               kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+                       (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+       }
+
+       /* Populate modules mapping */
+       kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+               (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+       /*
+        * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+        * should make sure that it maps the zero page read-only.
+        */
+       for (i = 0; i < PTRS_PER_PTE; i++)
+               set_pte(&kasan_early_shadow_pte[i],
+                       pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+       memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+       csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+       local_flush_tlb_all();
+
+       /* At this point kasan is fully initialized. Enable error messages */
+       init_task.kasan_depth = 0;
+       pr_info("KernelAddressSanitizer initialized.\n");
+}
index fbe1a4856fc42d4f255d9e9c3a9e349c3fa09e9a..a9630a81b38abbfc575ea4174af049ccd5a9a888 100644 (file)
@@ -8,12 +8,11 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 
-unsigned long shm_align_mask = PAGE_SIZE - 1;  /* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK (SHMLBA - 1)
 
-#define COLOUR_ALIGN(addr, pgoff)                              \
-       ((((addr) + shm_align_mask) & ~shm_align_mask) +        \
-        (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff)                      \
+       ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK)  \
+        + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
 
 enum mmap_allocation_direction {UP, DOWN};
 
@@ -40,7 +39,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
                 * cache aliasing constraints.
                 */
                if ((flags & MAP_SHARED) &&
-                   ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+                   ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
                        return -EINVAL;
                return addr;
        }
@@ -63,7 +62,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
        }
 
        info.length = len;
-       info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+       info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
        info.align_offset = pgoff << PAGE_SHIFT;
 
        if (dir == DOWN) {
index b14343e211b63f492b7aedd6ee0db9071156aa4a..71d0539e2d0b0207f901e3ef75679bc4e6bc2fa3 100644 (file)
@@ -9,6 +9,18 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+       return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
        pgd_t *init, *ret = NULL;
index a50308b6fc259082fb61e29e7b5b9fb99082fb83..5c97d14633282186534b2aea039add1af77b0518 100644 (file)
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Objects to go into the VDSO.
 
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
 # Include the generic Makefile to check the built vdso.
 include $(srctree)/lib/vdso/Makefile
 
index 819b6bc8ac0886d8d671a117ac9d9c694baebe95..3df5499f79369df25448c3069f00888d0cbb31f2 100644 (file)
@@ -54,11 +54,13 @@ extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
 int kasan_populate_early_shadow(const void *shadow_start,
                                const void *shadow_end);
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline void *kasan_mem_to_shadow(const void *addr)
 {
        return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
                + KASAN_SHADOW_OFFSET;
 }
+#endif
 
 int kasan_add_zero_shadow(void *start, unsigned long size);
 void kasan_remove_zero_shadow(void *start, unsigned long size);
index f29aaaf2eb21e9bdab1943c50309919bec16effe..006e18decfad0796a9b8d14cc09170c074493c27 100644 (file)
@@ -108,6 +108,8 @@ extern const struct raid6_calls raid6_vpermxor1;
 extern const struct raid6_calls raid6_vpermxor2;
 extern const struct raid6_calls raid6_vpermxor4;
 extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
 
 struct raid6_recov_calls {
        void (*data2)(int, size_t, int, int, void **);
@@ -123,6 +125,8 @@ extern const struct raid6_recov_calls raid6_recov_avx2;
 extern const struct raid6_recov_calls raid6_recov_avx512;
 extern const struct raid6_recov_calls raid6_recov_s390xc;
 extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
index 45e17619422b4ef4b3c278b242806668d034b19f..035b0a4db476a1fba02620c2a422323dc5a1f560 100644 (file)
@@ -9,6 +9,7 @@ raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \
                               vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
 
 hostprogs      += mktables
 
index a22a05c9af8a3a75610fd2eca0c0b195009e2fe9..0ec534faf019bc966095f9cecf082720a834d412 100644 (file)
@@ -73,6 +73,14 @@ const struct raid6_calls * const raid6_algos[] = {
        &raid6_neonx2,
        &raid6_neonx1,
 #endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_lsx,
+#endif
+#endif
 #if defined(__ia64__)
        &raid6_intx32,
        &raid6_intx16,
@@ -103,6 +111,14 @@ const struct raid6_recov_calls *const raid6_recov_algos[] = {
 #endif
 #if defined(CONFIG_KERNEL_MODE_NEON)
        &raid6_recov_neon,
+#endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+       &raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+       &raid6_recov_lsx,
+#endif
 #endif
        &raid6_recov_intx1,
        NULL
diff --git a/lib/raid6/loongarch.h b/lib/raid6/loongarch.h
new file mode 100644 (file)
index 0000000..acfc33c
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX    (1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX   (1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx    (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx   (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
diff --git a/lib/raid6/loongarch_simd.c b/lib/raid6/loongarch_simd.c
new file mode 100644 (file)
index 0000000..aa5d9f9
--- /dev/null
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+               asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+               asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+               asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+               asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+                                  size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $vr0, $vr1, $vr2, $vr3: wp
+        * $vr4, $vr5, $vr6, $vr7: wq
+        * $vr8, $vr9, $vr10, $vr11: wd
+        * $vr12, $vr13, $vr14, $vr15: w2
+        * $vr16, $vr17, $vr18, $vr19: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*4) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+               asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+               asm volatile("vori.b $vr4, $vr0, 0");
+               asm volatile("vori.b $vr5, $vr1, 0");
+               asm volatile("vori.b $vr6, $vr2, 0");
+               asm volatile("vori.b $vr7, $vr3, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+                       asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("vxor.v $vr0, $vr0, $vr8");
+                       asm volatile("vxor.v $vr1, $vr1, $vr9");
+                       asm volatile("vxor.v $vr2, $vr2, $vr10");
+                       asm volatile("vxor.v $vr3, $vr3, $vr11");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("vxor.v $vr16, $vr16, $vr12");
+                       asm volatile("vxor.v $vr17, $vr17, $vr13");
+                       asm volatile("vxor.v $vr18, $vr18, $vr14");
+                       asm volatile("vxor.v $vr19, $vr19, $vr15");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr8");
+                       asm volatile("vxor.v $vr5, $vr17, $vr9");
+                       asm volatile("vxor.v $vr6, $vr18, $vr10");
+                       asm volatile("vxor.v $vr7, $vr19, $vr11");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("vslti.b $vr12, $vr4, 0");
+                       asm volatile("vslti.b $vr13, $vr5, 0");
+                       asm volatile("vslti.b $vr14, $vr6, 0");
+                       asm volatile("vslti.b $vr15, $vr7, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("vslli.b $vr16, $vr4, 1");
+                       asm volatile("vslli.b $vr17, $vr5, 1");
+                       asm volatile("vslli.b $vr18, $vr6, 1");
+                       asm volatile("vslli.b $vr19, $vr7, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("vandi.b $vr12, $vr12, 0x1d");
+                       asm volatile("vandi.b $vr13, $vr13, 0x1d");
+                       asm volatile("vandi.b $vr14, $vr14, 0x1d");
+                       asm volatile("vandi.b $vr15, $vr15, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("vxor.v $vr4, $vr16, $vr12");
+                       asm volatile("vxor.v $vr5, $vr17, $vr13");
+                       asm volatile("vxor.v $vr6, $vr18, $vr14");
+                       asm volatile("vxor.v $vr7, $vr19, $vr15");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "vld $vr20, %0\n\t"
+                       "vld $vr21, %1\n\t"
+                       "vld $vr22, %2\n\t"
+                       "vld $vr23, %3\n\t"
+                       "vld $vr24, %4\n\t"
+                       "vld $vr25, %5\n\t"
+                       "vld $vr26, %6\n\t"
+                       "vld $vr27, %7\n\t"
+                       "vxor.v $vr20, $vr20, $vr0\n\t"
+                       "vxor.v $vr21, $vr21, $vr1\n\t"
+                       "vxor.v $vr22, $vr22, $vr2\n\t"
+                       "vxor.v $vr23, $vr23, $vr3\n\t"
+                       "vxor.v $vr24, $vr24, $vr4\n\t"
+                       "vxor.v $vr25, $vr25, $vr5\n\t"
+                       "vxor.v $vr26, $vr26, $vr6\n\t"
+                       "vxor.v $vr27, $vr27, $vr7\n\t"
+                       "vst $vr20, %0\n\t"
+                       "vst $vr21, %1\n\t"
+                       "vst $vr22, %2\n\t"
+                       "vst $vr23, %3\n\t"
+                       "vst $vr24, %4\n\t"
+                       "vst $vr25, %5\n\t"
+                       "vst $vr26, %6\n\t"
+                       "vst $vr27, %7\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+       raid6_lsx_gen_syndrome,
+       raid6_lsx_xor_syndrome,
+       raid6_has_lsx,
+       "lsx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = disks - 3;         /* Highest data disk */
+       p = dptr[z0+1];         /* XOR parity */
+       q = dptr[z0+2];         /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= 0; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+               /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+               asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+               asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+               /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+               asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+               asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+                                   size_t bytes, void **ptrs)
+{
+       u8 **dptr = (u8 **)ptrs;
+       u8 *p, *q;
+       int d, z, z0;
+
+       z0 = stop;              /* P/Q right side optimization */
+       p = dptr[disks-2];      /* XOR parity */
+       q = dptr[disks-1];      /* RS syndrome */
+
+       kernel_fpu_begin();
+
+       /*
+        * $xr0, $xr1: wp
+        * $xr2, $xr3: wq
+        * $xr4, $xr5: wd
+        * $xr6, $xr7: w2
+        * $xr8, $xr9: w1
+        */
+       for (d = 0; d < bytes; d += NSIZE*2) {
+               /* P/Q data pages */
+               /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+               asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+               asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+               asm volatile("xvori.b $xr2, $xr0, 0");
+               asm volatile("xvori.b $xr3, $xr1, 0");
+               for (z = z0-1; z >= start; z--) {
+                       /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+                       asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+                       asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+                       /* wp$$ ^= wd$$; */
+                       asm volatile("xvxor.v $xr0, $xr0, $xr4");
+                       asm volatile("xvxor.v $xr1, $xr1, $xr5");
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* w1$$ ^= w2$$; */
+                       asm volatile("xvxor.v $xr8, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr9, $xr9, $xr7");
+                       /* wq$$ = w1$$ ^ wd$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr4");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr5");
+               }
+
+               /* P/Q left side optimization */
+               for (z = start-1; z >= 0; z--) {
+                       /* w2$$ = MASK(wq$$); */
+                       asm volatile("xvslti.b $xr6, $xr2, 0");
+                       asm volatile("xvslti.b $xr7, $xr3, 0");
+                       /* w1$$ = SHLBYTE(wq$$); */
+                       asm volatile("xvslli.b $xr8, $xr2, 1");
+                       asm volatile("xvslli.b $xr9, $xr3, 1");
+                       /* w2$$ &= NBYTES(0x1d); */
+                       asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+                       asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+                       /* wq$$ = w1$$ ^ w2$$; */
+                       asm volatile("xvxor.v $xr2, $xr8, $xr6");
+                       asm volatile("xvxor.v $xr3, $xr9, $xr7");
+               }
+               /*
+                * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+                * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+                */
+               asm volatile(
+                       "xvld $xr10, %0\n\t"
+                       "xvld $xr11, %1\n\t"
+                       "xvld $xr12, %2\n\t"
+                       "xvld $xr13, %3\n\t"
+                       "xvxor.v $xr10, $xr10, $xr0\n\t"
+                       "xvxor.v $xr11, $xr11, $xr1\n\t"
+                       "xvxor.v $xr12, $xr12, $xr2\n\t"
+                       "xvxor.v $xr13, $xr13, $xr3\n\t"
+                       "xvst $xr10, %0\n\t"
+                       "xvst $xr11, %1\n\t"
+                       "xvst $xr12, %2\n\t"
+                       "xvst $xr13, %3\n\t"
+                       : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+                         "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+               );
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+       raid6_lasx_gen_syndrome,
+       raid6_lasx_xor_syndrome,
+       raid6_has_lasx,
+       "lasx",
+       .priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
diff --git a/lib/raid6/recov_loongarch_simd.c b/lib/raid6/recov_loongarch_simd.c
new file mode 100644 (file)
index 0000000..94aeac8
--- /dev/null
@@ -0,0 +1,513 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+       return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+                                 int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * vr20, vr21: qmul
+        * vr22, vr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+       while (bytes) {
+               /* vr4 - vr7: Q */
+               asm volatile("vld $vr4, %0" : : "m" (q[0]));
+               asm volatile("vld $vr5, %0" : : "m" (q[16]));
+               asm volatile("vld $vr6, %0" : : "m" (q[32]));
+               asm volatile("vld $vr7, %0" : : "m" (q[48]));
+               /*  vr4 - vr7: Q + Qxy */
+               asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               /* vr0 - vr3: P */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr0 - vr3: P + Pxy */
+               asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+               asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+               asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+               asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+               asm volatile("vxor.v $vr0, $vr0, $vr8");
+               asm volatile("vxor.v $vr1, $vr1, $vr9");
+               asm volatile("vxor.v $vr2, $vr2, $vr10");
+               asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+               asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+               asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+               asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+               asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+               asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+               asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+               /* vr16 - vr19: B(Q + Qxy) */
+               asm volatile("vxor.v $vr16, $vr8, $vr4");
+               asm volatile("vxor.v $vr17, $vr9, $vr5");
+               asm volatile("vxor.v $vr18, $vr10, $vr6");
+               asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+               /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("vsrli.b $vr4, $vr0, 4");
+               asm volatile("vsrli.b $vr5, $vr1, 4");
+               asm volatile("vsrli.b $vr6, $vr2, 4");
+               asm volatile("vsrli.b $vr7, $vr3, 4");
+               /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("vandi.b $vr12, $vr0, 0x0f");
+               asm volatile("vandi.b $vr13, $vr1, 0x0f");
+               asm volatile("vandi.b $vr14, $vr2, 0x0f");
+               asm volatile("vandi.b $vr15, $vr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+               asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+               asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+               asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+               /* lookup from pbmul[16] */
+               asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+               asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+               asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+               asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+               /* vr4 - vr7: A(P + Pxy) */
+               asm volatile("vxor.v $vr4, $vr4, $vr12");
+               asm volatile("vxor.v $vr5, $vr5, $vr13");
+               asm volatile("vxor.v $vr6, $vr6, $vr14");
+               asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+               /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr16");
+               asm volatile("vxor.v $vr5, $vr5, $vr17");
+               asm volatile("vxor.v $vr6, $vr6, $vr18");
+               asm volatile("vxor.v $vr7, $vr7, $vr19");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Pxy + Dx = Dy */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+               asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+               asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+               asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+                                 void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* vr22, vr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+       while (bytes) {
+               /* vr0 - vr3: P + Dx */
+               asm volatile("vld $vr0, %0" : : "m" (p[0]));
+               asm volatile("vld $vr1, %0" : : "m" (p[16]));
+               asm volatile("vld $vr2, %0" : : "m" (p[32]));
+               asm volatile("vld $vr3, %0" : : "m" (p[48]));
+               /* vr4 - vr7: Qx */
+               asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+               asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+               asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+               asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+               /* vr4 - vr7: Q + Qx */
+               asm volatile("vld $vr8, %0" : : "m" (q[0]));
+               asm volatile("vld $vr9, %0" : : "m" (q[16]));
+               asm volatile("vld $vr10, %0" : : "m" (q[32]));
+               asm volatile("vld $vr11, %0" : : "m" (q[48]));
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+               /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("vsrli.b $vr8, $vr4, 4");
+               asm volatile("vsrli.b $vr9, $vr5, 4");
+               asm volatile("vsrli.b $vr10, $vr6, 4");
+               asm volatile("vsrli.b $vr11, $vr7, 4");
+               /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("vandi.b $vr4, $vr4, 0x0f");
+               asm volatile("vandi.b $vr5, $vr5, 0x0f");
+               asm volatile("vandi.b $vr6, $vr6, 0x0f");
+               asm volatile("vandi.b $vr7, $vr7, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+               asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+               asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+               asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+               /* lookup from qmul[16] */
+               asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+               asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+               asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+               asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+               /* vr4 - vr7: qmul(Q + Qx) = Dx */
+               asm volatile("vxor.v $vr4, $vr4, $vr8");
+               asm volatile("vxor.v $vr5, $vr5, $vr9");
+               asm volatile("vxor.v $vr6, $vr6, $vr10");
+               asm volatile("vxor.v $vr7, $vr7, $vr11");
+               asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+               asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+               asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+               asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+               /* vr0 - vr3: P + Dx + Dx = P */
+               asm volatile("vxor.v $vr0, $vr0, $vr4");
+               asm volatile("vxor.v $vr1, $vr1, $vr5");
+               asm volatile("vxor.v $vr2, $vr2, $vr6");
+               asm volatile("vxor.v $vr3, $vr3, $vr7");
+               asm volatile("vst $vr0, %0" : "=m" (p[0]));
+               asm volatile("vst $vr1, %0" : "=m" (p[16]));
+               asm volatile("vst $vr2, %0" : "=m" (p[32]));
+               asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+       .data2 = raid6_2data_recov_lsx,
+       .datap = raid6_datap_recov_lsx,
+       .valid = raid6_has_lsx,
+       .name = "lsx",
+       .priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+       return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+                                  int failb, void **ptrs)
+{
+       u8 *p, *q, *dp, *dq;
+       const u8 *pbmul;        /* P multiplier table for B data */
+       const u8 *qmul;         /* Q multiplier table (for both) */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data pages
+        * Use the dead data pages as temporary storage for
+        * delta p and delta q
+        */
+       dp = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 2] = dp;
+       dq = (u8 *)ptrs[failb];
+       ptrs[failb] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dp;
+       ptrs[failb] = dq;
+       ptrs[disks - 2] = p;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+       kernel_fpu_begin();
+
+       /*
+        * xr20, xr21: qmul
+        * xr22, xr23: pbmul
+        */
+       asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+       asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+       asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+       asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+       asm volatile("xvreplve0.q $xr20, $xr20");
+       asm volatile("xvreplve0.q $xr21, $xr21");
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: Q */
+               asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+               /* xr0, xr1: Q + Qxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+               /* xr2, xr3: P */
+               asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+               /* xr2, xr3: P + Pxy */
+               asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvsrli.b $xr4, $xr0, 4");
+               asm volatile("xvsrli.b $xr5, $xr1, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+               asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+               /* xr6, xr7: B(Q + Qxy) */
+               asm volatile("xvxor.v $xr6, $xr4, $xr0");
+               asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+               /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+               asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+               /* lookup from pbmul[0] */
+               asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+               asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+               /* lookup from pbmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr0, xr1: A(P + Pxy) */
+               asm volatile("xvxor.v $xr0, $xr0, $xr4");
+               asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+               /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+               asm volatile("xvxor.v $xr0, $xr0, $xr6");
+               asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+               /* xr2, xr3: P + Pxy + Dx = Dy */
+               asm volatile("xvxor.v $xr2, $xr2, $xr0");
+               asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+               asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dp += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+                                  void **ptrs)
+{
+       u8 *p, *q, *dq;
+       const u8 *qmul;         /* Q multiplier table */
+
+       p = (u8 *)ptrs[disks - 2];
+       q = (u8 *)ptrs[disks - 1];
+
+       /*
+        * Compute syndrome with zero for the missing data page
+        * Use the dead data page as temporary storage for delta q
+        */
+       dq = (u8 *)ptrs[faila];
+       ptrs[faila] = (void *)raid6_empty_zero_page;
+       ptrs[disks - 1] = dq;
+
+       raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+       /* Restore pointer table */
+       ptrs[faila] = dq;
+       ptrs[disks - 1] = q;
+
+       /* Now, pick the proper data tables */
+       qmul  = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+       kernel_fpu_begin();
+
+       /* xr22, xr23: qmul */
+       asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+       asm volatile("xvreplve0.q $xr22, $xr22");
+       asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+       asm volatile("xvreplve0.q $xr23, $xr23");
+
+       while (bytes) {
+               /* xr0, xr1: P + Dx */
+               asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+               asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+               /* xr2, xr3: Qx */
+               asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+               asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+               /* xr2, xr3: Q + Qx */
+               asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+               asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvsrli.b $xr4, $xr2, 4");
+               asm volatile("xvsrli.b $xr5, $xr3, 4");
+               /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+               asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+               asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+               /* lookup from qmul[0] */
+               asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+               asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+               /* lookup from qmul[16] */
+               asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+               asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+               /* xr2, xr3: qmul(Q + Qx) = Dx */
+               asm volatile("xvxor.v $xr2, $xr2, $xr4");
+               asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+               /* xr0, xr1: P + Dx + Dx = P */
+               asm volatile("xvxor.v $xr0, $xr0, $xr2");
+               asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+               asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+               asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+               asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+               asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+               bytes -= 64;
+               p += 64;
+               q += 64;
+               dq += 64;
+       }
+
+       kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+       .data2 = raid6_2data_recov_lasx,
+       .datap = raid6_datap_recov_lasx,
+       .valid = raid6_has_lasx,
+       .name = "lasx",
+       .priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
index 1f693ea3b980cef7f229ba19c5a794d4e24377b8..2abe0076a636c3816678d13df2b0c7d036da154a 100644 (file)
@@ -41,6 +41,16 @@ ifeq ($(findstring ppc,$(ARCH)),ppc)
                          gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
 endif
 
+ifeq ($(ARCH),loongarch64)
+        CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+        CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' |         \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+        CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' |        \
+                    gcc -c -x assembler - >/dev/null 2>&1 &&    \
+                    rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
 ifeq ($(IS_X86),yes)
         OBJS   += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
         CFLAGS += -DCONFIG_X86
@@ -54,6 +64,8 @@ else ifeq ($(HAS_ALTIVEC),yes)
         CFLAGS += -DCONFIG_ALTIVEC
         OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
                 vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+        OBJS += loongarch_simd.o recov_loongarch_simd.o
 endif
 
 .c.o:
index dcfec277e8394be8312058c3cab2dcdacd5b105b..89895f38f722423c02744f359799db4bb7f12dc2 100644 (file)
@@ -139,6 +139,10 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
 static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                unsigned long end)
 {
@@ -166,8 +170,9 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               pud_populate(&init_mm, pud,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pmd_init(p);
+                               pud_populate(&init_mm, pud, p);
                        }
                }
                zero_pmd_populate(pud, addr, next);
@@ -176,6 +181,10 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
        return 0;
 }
 
+void __weak __meminit pud_init(void *addr)
+{
+}
+
 static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                unsigned long end)
 {
@@ -207,8 +216,9 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
                                if (!p)
                                        return -ENOMEM;
                        } else {
-                               p4d_populate(&init_mm, p4d,
-                                       early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+                               p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                               pud_init(p);
+                               p4d_populate(&init_mm, p4d, p);
                        }
                }
                zero_pud_populate(p4d, addr, next);
index 2e973b36fe0725339c04eda6f10446b8b28244ca..f70e3d7a602e1e816abe7357793fcb16fdfa3f91 100644 (file)
@@ -291,16 +291,22 @@ struct kasan_stack_ring {
 
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
+#ifndef __HAVE_ARCH_SHADOW_MAP
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
 {
        return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
                << KASAN_SHADOW_SCALE_SHIFT);
 }
+#endif
 
 static __always_inline bool addr_has_metadata(const void *addr)
 {
+#ifdef __HAVE_ARCH_SHADOW_MAP
+       return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
        return (kasan_reset_tag(addr) >=
                kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
 }
 
 /**
index 96fd0411f5c5866d69913f6e6e8a87285e830aad..3872528d096380b0c412897e7cc7b56b4cb59eb9 100644 (file)
@@ -574,13 +574,14 @@ static void rcu_guarded_free(struct rcu_head *h)
  */
 static unsigned long kfence_init_pool(void)
 {
-       unsigned long addr = (unsigned long)__kfence_pool;
+       unsigned long addr;
        struct page *pages;
        int i;
 
        if (!arch_kfence_init_pool())
-               return addr;
+               return (unsigned long)__kfence_pool;
 
+       addr = (unsigned long)__kfence_pool;
        pages = virt_to_page(__kfence_pool);
 
        /*