select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_CC_PLATFORM
+ select ARCH_HAS_CRC32
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
-obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o
+obj-$(CONFIG_CRC32_ARCH) += crc32-arm64.o
+crc32-arm64-y := crc32.o crc32-glue.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
#include <linux/crc32.h>
#include <linux/linkage.h>
+#include <linux/module.h>
#include <asm/alternative.h>
#include <asm/cpufeature.h>
asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32_le_base(crc, p, len);
return crc32_le_arm64(crc, p, len);
}
+EXPORT_SYMBOL(crc32_le_arch);
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32c_le_base(crc, p, len);
return crc32c_le_arm64(crc, p, len);
}
+EXPORT_SYMBOL(crc32c_le_arch);
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
{
if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
return crc32_be_base(crc, p, len);
return crc32_be_arm64(crc, p, len);
}
+EXPORT_SYMBOL(crc32_be_arch);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("arm64-optimized CRC32 functions");
select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_HAS_BINFMT_FLAT
+ select ARCH_HAS_CRC32 if RISCV_ISA_ZBC
select ARCH_HAS_CURRENT_STACK_POINTER
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_VM_PGTABLE
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
-lib-$(CONFIG_RISCV_ISA_ZBC) += crc32.o
-
+obj-$(CONFIG_CRC32_ARCH) += crc32-riscv.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
lib-$(CONFIG_RISCV_ISA_V) += xor.o
lib-$(CONFIG_RISCV_ISA_V) += riscv_v_helpers.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Accelerated CRC32 implementation with Zbc extension.
+ *
+ * Copyright (C) 2024 Intel Corporation
+ */
+
+#include <asm/hwcap.h>
+#include <asm/alternative-macros.h>
+#include <asm/byteorder.h>
+
+#include <linux/types.h>
+#include <linux/minmax.h>
+#include <linux/crc32poly.h>
+#include <linux/crc32.h>
+#include <linux/byteorder/generic.h>
+#include <linux/module.h>
+
+/*
+ * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
+ * better understanding of how this math works.
+ *
+ * let "+" denotes polynomial add (XOR)
+ * let "-" denotes polynomial sub (XOR)
+ * let "*" denotes polynomial multiplication
+ * let "/" denotes polynomial floor division
+ * let "S" denotes source data, XLEN bit wide
+ * let "P" denotes CRC32 polynomial
+ * let "T" denotes 2^(XLEN+32)
+ * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
+ *
+ * crc32(S, P)
+ * => S * (2^32) - S * (2^32) / P * P
+ * => lowest 32 bits of: S * (2^32) / P * P
+ * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
+ * => lowest 32 bits of: S * (2^32) * quotient / T * P
+ * => lowest 32 bits of: S * quotient / 2^XLEN * P
+ * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
+ * => clmul_low_part(clmul_high_part(S, QT) + S, P)
+ *
+ * In terms of below implementations, the BE case is more intuitive, since the
+ * higher order bit sits at more significant position.
+ */
+
+#if __riscv_xlen == 64
+/* Slide by XLEN bits per iteration */
+# define STEP_ORDER 3
+
+/* Each below polynomial quotient has an implicit bit for 2^XLEN */
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
+# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
+# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
+
+/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
+ * the same as the bit-reversed version of CRC32_POLY_QT_LE
+ */
+# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
+
+static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
+{
+ return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
+}
+
+static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
+{
+ u32 crc;
+
+ /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
+ asm volatile (".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %1, %2\n"
+ "slli %0, %0, 1\n"
+ "xor %0, %0, %1\n"
+ "clmulr %0, %0, %3\n"
+ "srli %0, %0, 32\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (s),
+ "r" (poly_qt),
+ "r" ((u64)poly << 32)
+ :);
+ return crc;
+}
+
+static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
+{
+ return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
+}
+
+#elif __riscv_xlen == 32
+# define STEP_ORDER 2
+/* Each quotient should match the upper half of its analog in RV64 */
+# define CRC32_POLY_QT_LE 0xfb808b20
+# define CRC32C_POLY_QT_LE 0x6f5389f8
+# define CRC32_POLY_QT_BE 0x04d101df
+
+static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
+{
+ return crc ^ (__force u32)__cpu_to_le32(*ptr);
+}
+
+static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
+{
+ u32 crc;
+
+ /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
+ asm volatile (".option push\n"
+ ".option arch,+zbc\n"
+ "clmul %0, %1, %2\n"
+ "slli %0, %0, 1\n"
+ "xor %0, %0, %1\n"
+ "clmulr %0, %0, %3\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (s),
+ "r" (poly_qt),
+ "r" (poly)
+ :);
+ return crc;
+}
+
+static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
+{
+ return crc ^ (__force u32)__cpu_to_be32(*ptr);
+}
+
+#else
+# error "Unexpected __riscv_xlen"
+#endif
+
+static inline u32 crc32_be_zbc(unsigned long s)
+{
+ u32 crc;
+
+ asm volatile (".option push\n"
+ ".option arch,+zbc\n"
+ "clmulh %0, %1, %2\n"
+ "xor %0, %0, %1\n"
+ "clmul %0, %0, %3\n"
+ ".option pop\n"
+ : "=&r" (crc)
+ : "r" (s),
+ "r" (CRC32_POLY_QT_BE),
+ "r" (CRC32_POLY_BE)
+ :);
+ return crc;
+}
+
+#define STEP (1 << STEP_ORDER)
+#define OFFSET_MASK (STEP - 1)
+
+typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
+
+static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
+ size_t len, u32 poly,
+ unsigned long poly_qt)
+{
+ size_t bits = len * 8;
+ unsigned long s = 0;
+ u32 crc_low = 0;
+
+ for (int i = 0; i < len; i++)
+ s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
+
+ s ^= (unsigned long)crc << (__riscv_xlen - bits);
+ if (__riscv_xlen == 32 || len < sizeof(u32))
+ crc_low = crc >> bits;
+
+ crc = crc32_le_zbc(s, poly, poly_qt);
+ crc ^= crc_low;
+
+ return crc;
+}
+
+static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
+ size_t len, u32 poly,
+ unsigned long poly_qt,
+ fallback crc_fb)
+{
+ size_t offset, head_len, tail_len;
+ unsigned long const *p_ul;
+ unsigned long s;
+
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ RISCV_ISA_EXT_ZBC, 1)
+ : : : : legacy);
+
+ /* Handle the unaligned head. */
+ offset = (unsigned long)p & OFFSET_MASK;
+ if (offset && len) {
+ head_len = min(STEP - offset, len);
+ crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
+ p += head_len;
+ len -= head_len;
+ }
+
+ tail_len = len & OFFSET_MASK;
+ len = len >> STEP_ORDER;
+ p_ul = (unsigned long const *)p;
+
+ for (int i = 0; i < len; i++) {
+ s = crc32_le_prep(crc, p_ul);
+ crc = crc32_le_zbc(s, poly, poly_qt);
+ p_ul++;
+ }
+
+ /* Handle the tail bytes. */
+ p = (unsigned char const *)p_ul;
+ if (tail_len)
+ crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
+
+ return crc;
+
+legacy:
+ return crc_fb(crc, p, len);
+}
+
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
+ crc32_le_base);
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+ return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
+ CRC32C_POLY_QT_LE, crc32c_le_base);
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
+ size_t len)
+{
+ size_t bits = len * 8;
+ unsigned long s = 0;
+ u32 crc_low = 0;
+
+ s = 0;
+ for (int i = 0; i < len; i++)
+ s = *p++ | (s << 8);
+
+ if (__riscv_xlen == 32 || len < sizeof(u32)) {
+ s ^= crc >> (32 - bits);
+ crc_low = crc << bits;
+ } else {
+ s ^= (unsigned long)crc << (bits - 32);
+ }
+
+ crc = crc32_be_zbc(s);
+ crc ^= crc_low;
+
+ return crc;
+}
+
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+ size_t offset, head_len, tail_len;
+ unsigned long const *p_ul;
+ unsigned long s;
+
+ asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
+ RISCV_ISA_EXT_ZBC, 1)
+ : : : : legacy);
+
+ /* Handle the unaligned head. */
+ offset = (unsigned long)p & OFFSET_MASK;
+ if (offset && len) {
+ head_len = min(STEP - offset, len);
+ crc = crc32_be_unaligned(crc, p, head_len);
+ p += head_len;
+ len -= head_len;
+ }
+
+ tail_len = len & OFFSET_MASK;
+ len = len >> STEP_ORDER;
+ p_ul = (unsigned long const *)p;
+
+ for (int i = 0; i < len; i++) {
+ s = crc32_be_prep(crc, p_ul);
+ crc = crc32_be_zbc(s);
+ p_ul++;
+ }
+
+ /* Handle the tail bytes. */
+ p = (unsigned char const *)p_ul;
+ if (tail_len)
+ crc = crc32_be_unaligned(crc, p, tail_len);
+
+ return crc;
+
+legacy:
+ return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension");
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Accelerated CRC32 implementation with Zbc extension.
- *
- * Copyright (C) 2024 Intel Corporation
- */
-
-#include <asm/hwcap.h>
-#include <asm/alternative-macros.h>
-#include <asm/byteorder.h>
-
-#include <linux/types.h>
-#include <linux/minmax.h>
-#include <linux/crc32poly.h>
-#include <linux/crc32.h>
-#include <linux/byteorder/generic.h>
-
-/*
- * Refer to https://www.corsix.org/content/barrett-reduction-polynomials for
- * better understanding of how this math works.
- *
- * let "+" denotes polynomial add (XOR)
- * let "-" denotes polynomial sub (XOR)
- * let "*" denotes polynomial multiplication
- * let "/" denotes polynomial floor division
- * let "S" denotes source data, XLEN bit wide
- * let "P" denotes CRC32 polynomial
- * let "T" denotes 2^(XLEN+32)
- * let "QT" denotes quotient of T/P, with the bit for 2^XLEN being implicit
- *
- * crc32(S, P)
- * => S * (2^32) - S * (2^32) / P * P
- * => lowest 32 bits of: S * (2^32) / P * P
- * => lowest 32 bits of: S * (2^32) * (T / P) / T * P
- * => lowest 32 bits of: S * (2^32) * quotient / T * P
- * => lowest 32 bits of: S * quotient / 2^XLEN * P
- * => lowest 32 bits of: (clmul_high_part(S, QT) + S) * P
- * => clmul_low_part(clmul_high_part(S, QT) + S, P)
- *
- * In terms of below implementations, the BE case is more intuitive, since the
- * higher order bit sits at more significant position.
- */
-
-#if __riscv_xlen == 64
-/* Slide by XLEN bits per iteration */
-# define STEP_ORDER 3
-
-/* Each below polynomial quotient has an implicit bit for 2^XLEN */
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in LE format */
-# define CRC32_POLY_QT_LE 0x5a72d812fb808b20
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */
-# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8
-
-/* Polynomial quotient of (2^(XLEN+32))/CRC32_POLY, in BE format, it should be
- * the same as the bit-reversed version of CRC32_POLY_QT_LE
- */
-# define CRC32_POLY_QT_BE 0x04d101df481b4e5a
-
-static inline u64 crc32_le_prep(u32 crc, unsigned long const *ptr)
-{
- return (u64)crc ^ (__force u64)__cpu_to_le64(*ptr);
-}
-
-static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
-{
- u32 crc;
-
- /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
- asm volatile (".option push\n"
- ".option arch,+zbc\n"
- "clmul %0, %1, %2\n"
- "slli %0, %0, 1\n"
- "xor %0, %0, %1\n"
- "clmulr %0, %0, %3\n"
- "srli %0, %0, 32\n"
- ".option pop\n"
- : "=&r" (crc)
- : "r" (s),
- "r" (poly_qt),
- "r" ((u64)poly << 32)
- :);
- return crc;
-}
-
-static inline u64 crc32_be_prep(u32 crc, unsigned long const *ptr)
-{
- return ((u64)crc << 32) ^ (__force u64)__cpu_to_be64(*ptr);
-}
-
-#elif __riscv_xlen == 32
-# define STEP_ORDER 2
-/* Each quotient should match the upper half of its analog in RV64 */
-# define CRC32_POLY_QT_LE 0xfb808b20
-# define CRC32C_POLY_QT_LE 0x6f5389f8
-# define CRC32_POLY_QT_BE 0x04d101df
-
-static inline u32 crc32_le_prep(u32 crc, unsigned long const *ptr)
-{
- return crc ^ (__force u32)__cpu_to_le32(*ptr);
-}
-
-static inline u32 crc32_le_zbc(unsigned long s, u32 poly, unsigned long poly_qt)
-{
- u32 crc;
-
- /* We don't have a "clmulrh" insn, so use clmul + slli instead. */
- asm volatile (".option push\n"
- ".option arch,+zbc\n"
- "clmul %0, %1, %2\n"
- "slli %0, %0, 1\n"
- "xor %0, %0, %1\n"
- "clmulr %0, %0, %3\n"
- ".option pop\n"
- : "=&r" (crc)
- : "r" (s),
- "r" (poly_qt),
- "r" (poly)
- :);
- return crc;
-}
-
-static inline u32 crc32_be_prep(u32 crc, unsigned long const *ptr)
-{
- return crc ^ (__force u32)__cpu_to_be32(*ptr);
-}
-
-#else
-# error "Unexpected __riscv_xlen"
-#endif
-
-static inline u32 crc32_be_zbc(unsigned long s)
-{
- u32 crc;
-
- asm volatile (".option push\n"
- ".option arch,+zbc\n"
- "clmulh %0, %1, %2\n"
- "xor %0, %0, %1\n"
- "clmul %0, %0, %3\n"
- ".option pop\n"
- : "=&r" (crc)
- : "r" (s),
- "r" (CRC32_POLY_QT_BE),
- "r" (CRC32_POLY_BE)
- :);
- return crc;
-}
-
-#define STEP (1 << STEP_ORDER)
-#define OFFSET_MASK (STEP - 1)
-
-typedef u32 (*fallback)(u32 crc, unsigned char const *p, size_t len);
-
-static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
- size_t len, u32 poly,
- unsigned long poly_qt)
-{
- size_t bits = len * 8;
- unsigned long s = 0;
- u32 crc_low = 0;
-
- for (int i = 0; i < len; i++)
- s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8);
-
- s ^= (unsigned long)crc << (__riscv_xlen - bits);
- if (__riscv_xlen == 32 || len < sizeof(u32))
- crc_low = crc >> bits;
-
- crc = crc32_le_zbc(s, poly, poly_qt);
- crc ^= crc_low;
-
- return crc;
-}
-
-static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
- size_t len, u32 poly,
- unsigned long poly_qt,
- fallback crc_fb)
-{
- size_t offset, head_len, tail_len;
- unsigned long const *p_ul;
- unsigned long s;
-
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBC, 1)
- : : : : legacy);
-
- /* Handle the unaligned head. */
- offset = (unsigned long)p & OFFSET_MASK;
- if (offset && len) {
- head_len = min(STEP - offset, len);
- crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt);
- p += head_len;
- len -= head_len;
- }
-
- tail_len = len & OFFSET_MASK;
- len = len >> STEP_ORDER;
- p_ul = (unsigned long const *)p;
-
- for (int i = 0; i < len; i++) {
- s = crc32_le_prep(crc, p_ul);
- crc = crc32_le_zbc(s, poly, poly_qt);
- p_ul++;
- }
-
- /* Handle the tail bytes. */
- p = (unsigned char const *)p_ul;
- if (tail_len)
- crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt);
-
- return crc;
-
-legacy:
- return crc_fb(crc, p, len);
-}
-
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
-{
- return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
- crc32_le_base);
-}
-
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
-{
- return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
- CRC32C_POLY_QT_LE, crc32c_le_base);
-}
-
-static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
- size_t len)
-{
- size_t bits = len * 8;
- unsigned long s = 0;
- u32 crc_low = 0;
-
- s = 0;
- for (int i = 0; i < len; i++)
- s = *p++ | (s << 8);
-
- if (__riscv_xlen == 32 || len < sizeof(u32)) {
- s ^= crc >> (32 - bits);
- crc_low = crc << bits;
- } else {
- s ^= (unsigned long)crc << (bits - 32);
- }
-
- crc = crc32_be_zbc(s);
- crc ^= crc_low;
-
- return crc;
-}
-
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
-{
- size_t offset, head_len, tail_len;
- unsigned long const *p_ul;
- unsigned long s;
-
- asm goto(ALTERNATIVE("j %l[legacy]", "nop", 0,
- RISCV_ISA_EXT_ZBC, 1)
- : : : : legacy);
-
- /* Handle the unaligned head. */
- offset = (unsigned long)p & OFFSET_MASK;
- if (offset && len) {
- head_len = min(STEP - offset, len);
- crc = crc32_be_unaligned(crc, p, head_len);
- p += head_len;
- len -= head_len;
- }
-
- tail_len = len & OFFSET_MASK;
- len = len >> STEP_ORDER;
- p_ul = (unsigned long const *)p;
-
- for (int i = 0; i < len; i++) {
- s = crc32_be_prep(crc, p_ul);
- crc = crc32_be_zbc(s);
- p_ul++;
- }
-
- /* Handle the tail bytes. */
- p = (unsigned char const *)p_ul;
- if (tail_len)
- crc = crc32_be_unaligned(crc, p, tail_len);
-
- return crc;
-
-legacy:
- return crc32_be_base(crc, p, len);
-}
static int __init crc32_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
+ return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
static void __exit crc32_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + (&crc32_le != &crc32_le_base));
+ crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
subsys_initcall(crc32_mod_init);
static int __init crc32c_mod_init(void)
{
/* register the arch flavor only if it differs from the generic one */
- return crypto_register_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
+ return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
static void __exit crc32c_mod_fini(void)
{
- crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &crc32c_le_base));
+ crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH));
}
subsys_initcall(crc32c_mod_init);
#include <linux/types.h>
#include <linux/bitrev.h>
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
+
+static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
+{
+ if (IS_ENABLED(CONFIG_CRC32_ARCH))
+ return crc32_le_arch(crc, p, len);
+ return crc32_le_base(crc, p, len);
+}
+
+static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
+{
+ if (IS_ENABLED(CONFIG_CRC32_ARCH))
+ return crc32_be_arch(crc, p, len);
+ return crc32_be_base(crc, p, len);
+}
+
+/* TODO: leading underscores should be dropped once callers have been updated */
+static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
+{
+ if (IS_ENABLED(CONFIG_CRC32_ARCH))
+ return crc32c_le_arch(crc, p, len);
+ return crc32c_le_base(crc, p, len);
+}
/**
* crc32_le_combine - Combine two crc32 check values into one. For two
return crc32_le_shift(crc1, len2) ^ crc2;
}
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
-u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len);
-
/**
* __crc32c_le_combine - Combine two crc32c check values into one. For two
* sequences of bytes, seq1 and seq2 with lengths len1
the kernel tree does. Such modules that use library CRC32/CRC32c
functions require M here.
+config ARCH_HAS_CRC32
+ bool
+
config CRC32_SELFTEST
tristate "CRC32 perform self test on init"
depends on CRC32
choice
prompt "CRC32 implementation"
depends on CRC32
- default CRC32_SLICEBY8
+ default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32
+ default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32
help
- This option allows a kernel builder to override the default choice
- of CRC32 algorithm. Choose the default ("slice by 8") unless you
- know that you need one of the others.
+ This option allows you to override the default choice of CRC32
+ implementation. Choose the default unless you know that you need one
+ of the others.
-config CRC32_SLICEBY8
+config CRC32_IMPL_ARCH_PLUS_SLICEBY8
+ bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32
+ help
+ Use architecture-optimized implementation of CRC32. Fall back to
+ slice-by-8 in cases where the arch-optimized implementation cannot be
+ used, e.g. if the CPU lacks support for the needed instructions.
+
+ This is the default when an arch-optimized implementation exists.
+
+config CRC32_IMPL_ARCH_PLUS_SLICEBY1
+ bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32
+ help
+ Use architecture-optimized implementation of CRC32, but fall back to
+ slice-by-1 instead of slice-by-8 in order to reduce the binary size.
+
+config CRC32_IMPL_SLICEBY8
bool "Slice by 8 bytes"
help
Calculate checksum 8 bytes at a time with a clever slicing algorithm.
- This is the fastest algorithm, but comes with a 8KiB lookup table.
- Most modern processors have enough cache to hold this table without
- thrashing the cache.
-
- This is the default implementation choice. Choose this one unless
- you have a good reason not to.
+ This is much slower than the architecture-optimized implementation of
+ CRC32 (if the selected arch has one), but it is portable and is the
+ fastest implementation when no arch-optimized implementation is
+ available. It uses an 8KiB lookup table. Most modern processors have
+ enough cache to hold this table without thrashing the cache.
-config CRC32_SLICEBY4
+config CRC32_IMPL_SLICEBY4
bool "Slice by 4 bytes"
help
Calculate checksum 4 bytes at a time with a clever slicing algorithm.
Only choose this option if you know what you are doing.
-config CRC32_SARWATE
- bool "Sarwate's Algorithm (one byte at a time)"
+config CRC32_IMPL_SLICEBY1
+ bool "Slice by 1 byte (Sarwate's algorithm)"
help
Calculate checksum a byte at a time using Sarwate's algorithm. This
- is not particularly fast, but has a small 256 byte lookup table.
+ is not particularly fast, but has a small 1KiB lookup table.
Only choose this option if you know what you are doing.
-config CRC32_BIT
+config CRC32_IMPL_BIT
bool "Classic Algorithm (one bit at a time)"
help
Calculate checksum one bit at a time. This is VERY slow, but has
endchoice
+config CRC32_ARCH
+ tristate
+ default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
+
+config CRC32_SLICEBY8
+ bool
+ default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8
+
+config CRC32_SLICEBY4
+ bool
+ default y if CRC32_IMPL_SLICEBY4
+
+config CRC32_SARWATE
+ bool
+ default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1
+
+config CRC32_BIT
+ bool
+ default y if CRC32_IMPL_BIT
+
config CRC64
tristate "CRC64 functions"
help
}
#if CRC_LE_BITS == 1
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
}
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
-u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE);
}
-u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
{
return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE);
}
#endif
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(__crc32c_le);
-
-u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
EXPORT_SYMBOL(crc32_le_base);
-
-u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
EXPORT_SYMBOL(crc32c_le_base);
-u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
-
/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
}
#if CRC_BE_BITS == 1
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
{
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
}
#else
-u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len)
+u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
{
return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE);
}
#endif
-EXPORT_SYMBOL(crc32_be);
+EXPORT_SYMBOL(crc32_be_base);