lib/crc32: improve support for arch-specific overrides

author Eric Biggers <ebiggers@google.com>

Mon, 2 Dec 2024 01:08:27 +0000 (17:08 -0800)

committer Eric Biggers <ebiggers@google.com>

Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
author Eric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:08:27 +0000 (17:08 -0800)
committer Eric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig

index 100570a048c5e8892c0112704f9ca74c4fc55b27..71f6310c8240eedb18ebbb66c5a94127937eb92d 100644 (file)
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -21,6 +21,7 @@ config ARM64
         select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
         select ARCH_HAS_CACHE_LINE_SIZE
         select ARCH_HAS_CC_PLATFORM
+       select ARCH_HAS_CRC32
         select ARCH_HAS_CURRENT_STACK_POINTER
         select ARCH_HAS_DEBUG_VIRTUAL
         select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile

index 8e882f479d98143e5c70423d00f0cd255b2b793d..5fbcf0d5666550503a0ab6a84632b741dd827cde 100644 (file)
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -13,7 +13,8 @@ endif
  
  lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
  
-obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o
+obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
+crc32-arm64-y := crc32.o crc32-glue.o
  
  obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
  
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c

index ad015223d15df151c318b8aedfead9543acb8d19..d7f6e1cbf0d233c589a46d92d3cf44f95ae33282 100644 (file)
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32-glue.c
@@ -2,6 +2,7 @@
  
  #include <linux/crc32.h>
  #include <linux/linkage.h>
+#include <linux/module.h>
  
  #include <asm/alternative.h>
  #include <asm/cpufeature.h>
@@ -21,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
  asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
  asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
  
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
  {
         if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
                 return crc32_le_base(crc, p, len);
@@ -40,8 +41,9 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
  
         return crc32_le_arm64(crc, p, len);
  }
+EXPORT_SYMBOL(crc32_le_arch);
  
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
  {
         if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
                 return crc32c_le_base(crc, p, len);
@@ -60,8 +62,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
  
         return crc32c_le_arm64(crc, p, len);
  }
+EXPORT_SYMBOL(crc32c_le_arch);
  
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
  {
         if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
                 return crc32_be_base(crc, p, len);
@@ -80,3 +83,7 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
  
         return crc32_be_arm64(crc, p, len);
  }
+EXPORT_SYMBOL(crc32_be_arch);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig

index d4a7ca0388c071b536df59c0eb11d55f9080c7cd..7d5718667e3941597b90bac2584c34c9b62e748a 100644 (file)
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -24,6 +24,7 @@ config RISCV
         select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
         select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
         select ARCH_HAS_BINFMT_FLAT
+       select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
         select ARCH_HAS_CURRENT_STACK_POINTER
         select ARCH_HAS_DEBUG_VIRTUAL if MMU
         select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile

index 8eec6b69a875f89c0b60be8a96c87882764b38b0..79368a895feed3094bd41251ac93f4dbbe478496 100644 (file)
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -15,8 +15,7 @@ endif
  lib-$(CONFIG_MMU)      += uaccess.o
  lib-$(CONFIG_64BIT)    += tishift.o
  lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-lib-$(CONFIG_RISCV_ISA_ZBC)    += crc32.o
-
+obj-$(CONFIG_CRC32_ARCH)       += crc32-riscv.o
  obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
  lib-$(CONFIG_RISCV_ISA_V)      += xor.o
  lib-$(CONFIG_RISCV_ISA_V)      += riscv_v_helpers.o
diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c

new file mode 100644 (file)

index 0000000..a3ff7db
--- /dev/null
+++ b/arch/riscv/lib/crc32-riscv.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC32 implementation with Zbc extension.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <asm/byteorder.h>
+
+#include <linux/types.h>
+#include <linux/minmax.h>
+#include <linux/crc32poly.h>
+#include <linux/crc32.h>
+#include <linux/byteorder/generic.h>
+#include <linux/module.h>
+
+/*
+ * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
+ * better understanding of how this math works.
+ *
+ * let "+" denotes polynomial add (XOR)
+ * let "-" denotes polynomial sub (XOR)
+ * let "*" denotes polynomial multiplication
+ * let "/" denotes polynomial floor division
+ * let "S" denotes source data, XLEN bit wide
+ * let "P" denotes CRC32 polynomial
+ * let "T" denotes 2^(XLEN+32)
+ * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
+ *
+ * crc32(S, P)
+ * => S * (2^32) - S * (2^32) / P * P
+ * => lowest 32 bits of: S * (2^32) / P * P
+ * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
+ * => lowest 32 bits of: S * (2^32) * quotient / T * P
+ * => lowest 32 bits of: S * quotient / 2^XLEN * P
+ * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
+ * => clmul_low_part(clmul_high_part(S, QT) + S, P)
+ *
+ * In terms of below implementations, the BE case is more intuitive, since the
+ * higher order bit sits at more significant position.
+ */
+
+#if __riscv_xlen == 64
+/* Slide by XLEN bits per iteration */
+# define STEP_ORDER 3
+
+/* Each below polynomial quotient has an implicit bit for 2^XLEN */
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
+# define CRC32_POLY_QT_LE      0x5a72d812fb808b20
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
+# define CRC32C_POLY_QT_LE     0xa434f61c6f5389f8
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
+ * the same as the bit-reversed version of CRC32_POLY_QT_LE
+ */
+# define CRC32_POLY_QT_BE      0x04d101df481b4e5a
+
+static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
+{
+       return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
+}
+
+static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
+{
+       u32 crc;
+
+       /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
+       asm volatile (".option push\n"
+                     ".option arch,+zbc\n"
+                     "clmul    %0, %1, %2\n"
+                     "slli     %0, %0, 1\n"
+                     "xor      %0, %0, %1\n"
+                     "clmulr   %0, %0, %3\n"
+                     "srli     %0, %0, 32\n"
+                     ".option pop\n"
+                     : "=&r" (crc)
+                     : "r" (s),
+                       "r" (poly_qt),
+                       "r" ((u64)poly << 32)
+                     :);
+       return crc;
+}
+
+static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
+{
+       return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
+}
+
+#elif __riscv_xlen == 32
+# define STEP_ORDER 2
+/* Each quotient should match the upper half of its analog in RV64 */
+# define CRC32_POLY_QT_LE      0xfb808b20
+# define CRC32C_POLY_QT_LE     0x6f5389f8
+# define CRC32_POLY_QT_BE      0x04d101df
+
+static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
+{
+       return crc ^ (__force u32)__cpu_to_le32(*ptr);
+}
+
+static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
+{
+       u32 crc;
+
+       /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
+       asm volatile (".option push\n"
+                     ".option arch,+zbc\n"
+                     "clmul    %0, %1, %2\n"
+                     "slli     %0, %0, 1\n"
+                     "xor      %0, %0, %1\n"
+                     "clmulr   %0, %0, %3\n"
+                     ".option pop\n"
+                     : "=&r" (crc)
+                     : "r" (s),
+                       "r" (poly_qt),
+                       "r" (poly)
+                     :);
+       return crc;
+}
+
+static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
+{
+       return crc ^ (__force u32)__cpu_to_be32(*ptr);
+}
+
+#else
+# error "Unexpected __riscv_xlen"
+#endif
+
+static inline u32 crc32_be_zbc(unsigned long s)
+{
+       u32 crc;
+
+       asm volatile (".option push\n"
+                     ".option arch,+zbc\n"
+                     "clmulh   %0, %1, %2\n"
+                     "xor      %0, %0, %1\n"
+                     "clmul    %0, %0, %3\n"
+                     ".option pop\n"
+                     : "=&r" (crc)
+                     : "r" (s),
+                       "r" (CRC32_POLY_QT_BE),
+                       "r" (CRC32_POLY_BE)
+                     :);
+       return crc;
+}
+
+#define STEP           (1 << STEP_ORDER)
+#define OFFSET_MASK    (STEP - 1)
+
+typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
+
+static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
+                                    size_t len, u32 poly,
+                                    unsigned long poly_qt)
+{
+       size_t bits = len * 8;
+       unsigned long s = 0;
+       u32 crc_low = 0;
+
+       for (int i = 0; i < len; i++)
+               s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
+
+       s ^= (unsigned long)crc << (__riscv_xlen - bits);
+       if (__riscv_xlen == 32 || len < sizeof(u32))
+               crc_low = crc >> bits;
+
+       crc = crc32_le_zbc(s, poly, poly_qt);
+       crc ^= crc_low;
+
+       return crc;
+}
+
+static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
+                                         size_t len, u32 poly,
+                                         unsigned long poly_qt,
+                                         fallback crc_fb)
+{
+       size_t offset, head_len, tail_len;
+       unsigned long const *p_ul;
+       unsigned long s;
+
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                            RISCV_ISA_EXT_ZBC, 1)
+                : : : : legacy);
+
+       /* Handle the unaligned head. */
+       offset = (unsigned long)p & OFFSET_MASK;
+       if (offset && len) {
+               head_len = min(STEP - offset, len);
+               crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
+               p += head_len;
+               len -= head_len;
+       }
+
+       tail_len = len & OFFSET_MASK;
+       len = len >> STEP_ORDER;
+       p_ul = (unsigned long const *)p;
+
+       for (int i = 0; i < len; i++) {
+               s = crc32_le_prep(crc, p_ul);
+               crc = crc32_le_zbc(s, poly, poly_qt);
+               p_ul++;
+       }
+
+       /* Handle the tail bytes. */
+       p = (unsigned char const *)p_ul;
+       if (tail_len)
+               crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
+
+       return crc;
+
+legacy:
+       return crc_fb(crc, p, len);
+}
+
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
+                               crc32_le_base);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
+                               CRC32C_POLY_QT_LE, crc32c_le_base);
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
+                                    size_t len)
+{
+       size_t bits = len * 8;
+       unsigned long s = 0;
+       u32 crc_low = 0;
+
+       s = 0;
+       for (int i = 0; i < len; i++)
+               s = *p++ | (s << 8);
+
+       if (__riscv_xlen == 32 || len < sizeof(u32)) {
+               s ^= crc >> (32 - bits);
+               crc_low = crc << bits;
+       } else {
+               s ^= (unsigned long)crc << (bits - 32);
+       }
+
+       crc = crc32_be_zbc(s);
+       crc ^= crc_low;
+
+       return crc;
+}
+
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+       size_t offset, head_len, tail_len;
+       unsigned long const *p_ul;
+       unsigned long s;
+
+       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+                            RISCV_ISA_EXT_ZBC, 1)
+                : : : : legacy);
+
+       /* Handle the unaligned head. */
+       offset = (unsigned long)p & OFFSET_MASK;
+       if (offset && len) {
+               head_len = min(STEP - offset, len);
+               crc = crc32_be_unaligned(crc, p, head_len);
+               p += head_len;
+               len -= head_len;
+       }
+
+       tail_len = len & OFFSET_MASK;
+       len = len >> STEP_ORDER;
+       p_ul = (unsigned long const *)p;
+
+       for (int i = 0; i < len; i++) {
+               s = crc32_be_prep(crc, p_ul);
+               crc = crc32_be_zbc(s);
+               p_ul++;
+       }
+
+       /* Handle the tail bytes. */
+       p = (unsigned char const *)p_ul;
+       if (tail_len)
+               crc = crc32_be_unaligned(crc, p, tail_len);
+
+       return crc;
+
+legacy:
+       return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c

deleted file mode 100644 (file)

index 333fb7a..0000000
--- a/arch/riscv/lib/crc32.c
+++ /dev/null
@@ -1,294 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC32 implementation with Zbc extension.
- *
- * Copyright (C) 2024 Intel Corporation
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <asm/byteorder.h>
-
-#include <linux/types.h>
-#include <linux/minmax.h>
-#include <linux/crc32poly.h>
-#include <linux/crc32.h>
-#include <linux/byteorder/generic.h>
-
-/*
- * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
- * better understanding of how this math works.
- *
- * let "+" denotes polynomial add (XOR)
- * let "-" denotes polynomial sub (XOR)
- * let "*" denotes polynomial multiplication
- * let "/" denotes polynomial floor division
- * let "S" denotes source data, XLEN bit wide
- * let "P" denotes CRC32 polynomial
- * let "T" denotes 2^(XLEN+32)
- * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
- *
- * crc32(S, P)
- * => S * (2^32) - S * (2^32) / P * P
- * => lowest 32 bits of: S * (2^32) / P * P
- * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
- * => lowest 32 bits of: S * (2^32) * quotient / T * P
- * => lowest 32 bits of: S * quotient / 2^XLEN * P
- * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
- * => clmul_low_part(clmul_high_part(S, QT) + S, P)
- *
- * In terms of below implementations, the BE case is more intuitive, since the
- * higher order bit sits at more significant position.
- */
-
-#if __riscv_xlen == 64
-/* Slide by XLEN bits per iteration */
-# define STEP_ORDER 3
-
-/* Each below polynomial quotient has an implicit bit for 2^XLEN */
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
-# define CRC32_POLY_QT_LE      0x5a72d812fb808b20
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
-# define CRC32C_POLY_QT_LE     0xa434f61c6f5389f8
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
- * the same as the bit-reversed version of CRC32_POLY_QT_LE
- */
-# define CRC32_POLY_QT_BE      0x04d101df481b4e5a
-
-static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
-{
-       return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
-}
-
-static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
-{
-       u32 crc;
-
-       /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
-       asm volatile (".option push\n"
-                     ".option arch,+zbc\n"
-                     "clmul    %0, %1, %2\n"
-                     "slli     %0, %0, 1\n"
-                     "xor      %0, %0, %1\n"
-                     "clmulr   %0, %0, %3\n"
-                     "srli     %0, %0, 32\n"
-                     ".option pop\n"
-                     : "=&r" (crc)
-                     : "r" (s),
-                       "r" (poly_qt),
-                       "r" ((u64)poly << 32)
-                     :);
-       return crc;
-}
-
-static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
-{
-       return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
-}
-
-#elif __riscv_xlen == 32
-# define STEP_ORDER 2
-/* Each quotient should match the upper half of its analog in RV64 */
-# define CRC32_POLY_QT_LE      0xfb808b20
-# define CRC32C_POLY_QT_LE     0x6f5389f8
-# define CRC32_POLY_QT_BE      0x04d101df
-
-static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
-{
-       return crc ^ (__force u32)__cpu_to_le32(*ptr);
-}
-
-static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
-{
-       u32 crc;
-
-       /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
-       asm volatile (".option push\n"
-                     ".option arch,+zbc\n"
-                     "clmul    %0, %1, %2\n"
-                     "slli     %0, %0, 1\n"
-                     "xor      %0, %0, %1\n"
-                     "clmulr   %0, %0, %3\n"
-                     ".option pop\n"
-                     : "=&r" (crc)
-                     : "r" (s),
-                       "r" (poly_qt),
-                       "r" (poly)
-                     :);
-       return crc;
-}
-
-static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
-{
-       return crc ^ (__force u32)__cpu_to_be32(*ptr);
-}
-
-#else
-# error "Unexpected __riscv_xlen"
-#endif
-
-static inline u32 crc32_be_zbc(unsigned long s)
-{
-       u32 crc;
-
-       asm volatile (".option push\n"
-                     ".option arch,+zbc\n"
-                     "clmulh   %0, %1, %2\n"
-                     "xor      %0, %0, %1\n"
-                     "clmul    %0, %0, %3\n"
-                     ".option pop\n"
-                     : "=&r" (crc)
-                     : "r" (s),
-                       "r" (CRC32_POLY_QT_BE),
-                       "r" (CRC32_POLY_BE)
-                     :);
-       return crc;
-}
-
-#define STEP           (1 << STEP_ORDER)
-#define OFFSET_MASK    (STEP - 1)
-
-typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
-
-static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
-                                    size_t len, u32 poly,
-                                    unsigned long poly_qt)
-{
-       size_t bits = len * 8;
-       unsigned long s = 0;
-       u32 crc_low = 0;
-
-       for (int i = 0; i < len; i++)
-               s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
-
-       s ^= (unsigned long)crc << (__riscv_xlen - bits);
-       if (__riscv_xlen == 32 || len < sizeof(u32))
-               crc_low = crc >> bits;
-
-       crc = crc32_le_zbc(s, poly, poly_qt);
-       crc ^= crc_low;
-
-       return crc;
-}
-
-static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
-                                         size_t len, u32 poly,
-                                         unsigned long poly_qt,
-                                         fallback crc_fb)
-{
-       size_t offset, head_len, tail_len;
-       unsigned long const *p_ul;
-       unsigned long s;
-
-       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
-                            RISCV_ISA_EXT_ZBC, 1)
-                : : : : legacy);
-
-       /* Handle the unaligned head. */
-       offset = (unsigned long)p & OFFSET_MASK;
-       if (offset && len) {
-               head_len = min(STEP - offset, len);
-               crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
-               p += head_len;
-               len -= head_len;
-       }
-
-       tail_len = len & OFFSET_MASK;
-       len = len >> STEP_ORDER;
-       p_ul = (unsigned long const *)p;
-
-       for (int i = 0; i < len; i++) {
-               s = crc32_le_prep(crc, p_ul);
-               crc = crc32_le_zbc(s, poly, poly_qt);
-               p_ul++;
-       }
-
-       /* Handle the tail bytes. */
-       p = (unsigned char const *)p_ul;
-       if (tail_len)
-               crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
-
-       return crc;
-
-legacy:
-       return crc_fb(crc, p, len);
-}
-
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
-{
-       return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
-                               crc32_le_base);
-}
-
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
-{
-       return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
-                               CRC32C_POLY_QT_LE, crc32c_le_base);
-}
-
-static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
-                                    size_t len)
-{
-       size_t bits = len * 8;
-       unsigned long s = 0;
-       u32 crc_low = 0;
-
-       s = 0;
-       for (int i = 0; i < len; i++)
-               s = *p++ | (s << 8);
-
-       if (__riscv_xlen == 32 || len < sizeof(u32)) {
-               s ^= crc >> (32 - bits);
-               crc_low = crc << bits;
-       } else {
-               s ^= (unsigned long)crc << (bits - 32);
-       }
-
-       crc = crc32_be_zbc(s);
-       crc ^= crc_low;
-
-       return crc;
-}
-
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
-{
-       size_t offset, head_len, tail_len;
-       unsigned long const *p_ul;
-       unsigned long s;
-
-       asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
-                            RISCV_ISA_EXT_ZBC, 1)
-                : : : : legacy);
-
-       /* Handle the unaligned head. */
-       offset = (unsigned long)p & OFFSET_MASK;
-       if (offset && len) {
-               head_len = min(STEP - offset, len);
-               crc = crc32_be_unaligned(crc, p, head_len);
-               p += head_len;
-               len -= head_len;
-       }
-
-       tail_len = len & OFFSET_MASK;
-       len = len >> STEP_ORDER;
-       p_ul = (unsigned long const *)p;
-
-       for (int i = 0; i < len; i++) {
-               s = crc32_be_prep(crc, p_ul);
-               crc = crc32_be_zbc(s);
-               p_ul++;
-       }
-
-       /* Handle the tail bytes. */
-       p = (unsigned char const *)p_ul;
-       if (tail_len)
-               crc = crc32_be_unaligned(crc, p, tail_len);
-
-       return crc;
-
-legacy:
-       return crc32_be_base(crc, p, len);
-}
diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c

index 6a55d206fab31714e778adca6fb482c68c76349c..cc064ea8240e3c9ed7fe0f959906ee0108057cc7 100644 (file)
--- a/crypto/crc32_generic.c
+++ b/crypto/crc32_generic.c
@@ -160,12 +160,12 @@ static struct shash_alg algs[] = {{
  static int __init crc32_mod_init(void)
  {
         /* register the arch flavor only if it differs from the generic one */
-       return crypto_register_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
+       return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
  }
  
  static void __exit crc32_mod_fini(void)
  {
-       crypto_unregister_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
+       crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
  }
  
  subsys_initcall(crc32_mod_init);
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c

index 635599b255ec04afd5880687fdda5c56591b6f3c..04b03d825cf4562b122b272a043bfcb2f0c47116 100644 (file)
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -200,12 +200,12 @@ static struct shash_alg algs[] = {{
  static int __init crc32c_mod_init(void)
  {
         /* register the arch flavor only if it differs from the generic one */
-       return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
+       return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
  }
  
  static void __exit crc32c_mod_fini(void)
  {
-       crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
+       crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
  }
  
  subsys_initcall(crc32c_mod_init);
diff --git a/include/linux/crc32.h b/include/linux/crc32.h

index 5b07fc9081c474753253bba1262e76b22e53fb4b..58c632533b086170105513ca48982b83f9ebecc2 100644 (file)
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -8,10 +8,34 @@
  #include <linux/types.h>
  #include <linux/bitrev.h>
  
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
+
+static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
+{
+       if (IS_ENABLED(CONFIG_CRC32_ARCH))
+               return crc32_le_arch(crc, p, len);
+       return crc32_le_base(crc, p, len);
+}
+
+static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
+{
+       if (IS_ENABLED(CONFIG_CRC32_ARCH))
+               return crc32_be_arch(crc, p, len);
+       return crc32_be_base(crc, p, len);
+}
+
+/* TODO: leading underscores should be dropped once callers have been updated */
+static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
+{
+       if (IS_ENABLED(CONFIG_CRC32_ARCH))
+               return crc32c_le_arch(crc, p, len);
+       return crc32c_le_base(crc, p, len);
+}
  
  /**
   * crc32_le_combine - Combine two crc32 check values into one. For two
@@ -38,9 +62,6 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
         return crc32_le_shift(crc1, len2) ^ crc2;
  }
  
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
-
  /**
   * __crc32c_le_combine - Combine two crc32c check values into one. For two
   *                      sequences of bytes, seq1 and seq2 with lengths len1
diff --git a/lib/Kconfig b/lib/Kconfig

index 5a318f753b2f44cb0a7905cc0092e81c133bc112..8858030bebaea1e521862f6124bbfb16f09793e8 100644 (file)
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -190,6 +190,9 @@ config CRC32
           the kernel tree does. Such modules that use library CRC32/CRC32c
           functions require M here.
  
+config ARCH_HAS_CRC32
+       bool
+
  config CRC32_SELFTEST
         tristate "CRC32 perform self test on init"
         depends on CRC32
@@ -202,24 +205,39 @@ config CRC32_SELFTEST
  choice
         prompt "CRC32 implementation"
         depends on CRC32
-       default CRC32_SLICEBY8
+       default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32
+       default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32
         help
-         This option allows a kernel builder to override the default choice
-         of CRC32 algorithm.  Choose the default ("slice by 8") unless you
-         know that you need one of the others.
+         This option allows you to override the default choice of CRC32
+         implementation.  Choose the default unless you know that you need one
+         of the others.
  
-config CRC32_SLICEBY8
+config CRC32_IMPL_ARCH_PLUS_SLICEBY8
+       bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32
+       help
+         Use architecture-optimized implementation of CRC32.  Fall back to
+         slice-by-8 in cases where the arch-optimized implementation cannot be
+         used, e.g. if the CPU lacks support for the needed instructions.
+
+         This is the default when an arch-optimized implementation exists.
+
+config CRC32_IMPL_ARCH_PLUS_SLICEBY1
+       bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32
+       help
+         Use architecture-optimized implementation of CRC32, but fall back to
+         slice-by-1 instead of slice-by-8 in order to reduce the binary size.
+
+config CRC32_IMPL_SLICEBY8
         bool "Slice by 8 bytes"
         help
           Calculate checksum 8 bytes at a time with a clever slicing algorithm.
-         This is the fastest algorithm, but comes with a 8KiB lookup table.
-         Most modern processors have enough cache to hold this table without
-         thrashing the cache.
-
-         This is the default implementation choice.  Choose this one unless
-         you have a good reason not to.
+         This is much slower than the architecture-optimized implementation of
+         CRC32 (if the selected arch has one), but it is portable and is the
+         fastest implementation when no arch-optimized implementation is
+         available.  It uses an 8KiB lookup table.  Most modern processors have
+         enough cache to hold this table without thrashing the cache.
  
-config CRC32_SLICEBY4
+config CRC32_IMPL_SLICEBY4
         bool "Slice by 4 bytes"
         help
           Calculate checksum 4 bytes at a time with a clever slicing algorithm.
@@ -228,15 +246,15 @@ config CRC32_SLICEBY4
  
           Only choose this option if you know what you are doing.
  
-config CRC32_SARWATE
-       bool "Sarwate's Algorithm (one byte at a time)"
+config CRC32_IMPL_SLICEBY1
+       bool "Slice by 1 byte (Sarwate's algorithm)"
         help
           Calculate checksum a byte at a time using Sarwate's algorithm.  This
-         is not particularly fast, but has a small 256 byte lookup table.
+         is not particularly fast, but has a small 1KiB lookup table.
  
           Only choose this option if you know what you are doing.
  
-config CRC32_BIT
+config CRC32_IMPL_BIT
         bool "Classic Algorithm (one bit at a time)"
         help
           Calculate checksum one bit at a time.  This is VERY slow, but has
@@ -246,6 +264,26 @@ config CRC32_BIT
  
  endchoice
  
+config CRC32_ARCH
+       tristate
+       default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
+
+config CRC32_SLICEBY8
+       bool
+       default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8
+
+config CRC32_SLICEBY4
+       bool
+       default y if CRC32_IMPL_SLICEBY4
+
+config CRC32_SARWATE
+       bool
+       default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
+
+config CRC32_BIT
+       bool
+       default y if CRC32_IMPL_BIT
+
  config CRC64
         tristate "CRC64 functions"
         help
diff --git a/lib/crc32.c b/lib/crc32.c

index c67059b0082b41f48cb70d86964b8286c738d1ac..47151624332ef4e9c27f54c65278d0ada19d0fdc 100644 (file)
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -183,35 +183,27 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
  }
  
  #if CRC_LE_BITS == 1
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
  {
         return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
  }
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
  {
         return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
  }
  #else
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
  {
         return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
  }
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
  {
         return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
  }
  #endif
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(__crc32c_le);
-
-u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
  EXPORT_SYMBOL(crc32_le_base);
-
-u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
  EXPORT_SYMBOL(crc32c_le_base);
  
-u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
-
  /*
   * This multiplies the polynomials x and y modulo the given modulus.
   * This follows the "little-endian" CRC convention that the lsbit
@@ -335,14 +327,14 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
  }
  
  #if CRC_BE_BITS == 1
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
  {
         return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
  }
  #else
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
  {
         return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
  }
  #endif
-EXPORT_SYMBOL(crc32_be);
+EXPORT_SYMBOL(crc32_be_base);
author	Eric Biggers <ebiggers@google.com>
	Mon, 2 Dec 2024 01:08:27 +0000 (17:08 -0800)
committer	Eric Biggers <ebiggers@google.com>
	Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
arch/arm64/Kconfig		patch \| blob \| blame \| history
arch/arm64/lib/Makefile		patch \| blob \| blame \| history
arch/arm64/lib/crc32-glue.c		patch \| blob \| blame \| history
arch/riscv/Kconfig		patch \| blob \| blame \| history
arch/riscv/lib/Makefile		patch \| blob \| blame \| history
arch/riscv/lib/crc32-riscv.c	[new file with mode: 0644]	patch \| blob
arch/riscv/lib/crc32.c	[deleted file]	patch \| blob \| blame \| history
crypto/crc32_generic.c		patch \| blob \| blame \| history
crypto/crc32c_generic.c		patch \| blob \| blame \| history
include/linux/crc32.h		patch \| blob \| blame \| history
lib/Kconfig		patch \| blob \| blame \| history
lib/crc32.c		patch \| blob \| blame \| history