x86/crc32: expose CRC32 functions through lib

author Eric Biggers <ebiggers@google.com>

Mon, 2 Dec 2024 01:08:38 +0000 (17:08 -0800)

committer Eric Biggers <ebiggers@google.com>

Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
author Eric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:08:38 +0000 (17:08 -0800)
committer Eric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig

index 9d7bd0ae48c4260f4abb6dbedc696e3915c230ea..4f4fc67fbce7a69ac630bc553eb99a9e6d0fd5d5 100644 (file)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -76,6 +76,7 @@ config X86
         select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
         select ARCH_HAS_CPU_FINALIZE_INIT
         select ARCH_HAS_CPU_PASID               if IOMMU_SVA
+       select ARCH_HAS_CRC32
         select ARCH_HAS_CURRENT_STACK_POINTER
         select ARCH_HAS_DEBUG_VIRTUAL
         select ARCH_HAS_DEBUG_VM_PGTABLE        if !X86_PAE
diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig

index 3d2e38ba524033940ff1124bd644827bc4fba21d..ba9a7e73cd0847a1c00e9724a6e7401aa8c5f6ba 100644 (file)
--- a/arch/x86/crypto/Kconfig
+++ b/arch/x86/crypto/Kconfig
@@ -492,28 +492,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
           Architecture: x86_64 using:
           - CLMUL-NI (carry-less multiplication new instructions)
  
-config CRYPTO_CRC32C_INTEL
-       tristate "CRC32c (SSE4.2/PCLMULQDQ)"
-       depends on X86
-       select CRYPTO_HASH
-       help
-         CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
-
-         Architecture: x86 (32-bit and 64-bit) using:
-         - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction
-         - PCLMULQDQ (carry-less multiplication)
-
-config CRYPTO_CRC32_PCLMUL
-       tristate "CRC32 (PCLMULQDQ)"
-       depends on X86
-       select CRYPTO_HASH
-       select CRC32
-       help
-         CRC32 CRC algorithm (IEEE 802.3)
-
-         Architecture: x86 (32-bit and 64-bit) using:
-         - PCLMULQDQ (carry-less multiplication)
-
  config CRYPTO_CRCT10DIF_PCLMUL
         tristate "CRCT10DIF (PCLMULQDQ)"
         depends on X86 && 64BIT && CRC_T10DIF
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile

index 53b4a277809e0804777ea0c9e60456e899f95783..030b925ca4e28ab65311d6f0c586faabc0557c88 100644 (file)
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -75,13 +75,6 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
  obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
  polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
  
-obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
-crc32c-intel-y := crc32c-intel_glue.o
-crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
-
-obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
-crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
-
  obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
  crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
  
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S

deleted file mode 100644 (file)

index f963778..0000000
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ /dev/null
@@ -1,217 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- *           Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-
-
-.section .rodata
-.align 16
-/*
- * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
- * #define CONSTANT_R1  0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
- * #define CONSTANT_R2  0x1c6e41596LL
- */
-.Lconstant_R2R1:
-       .octa 0x00000001c6e415960000000154442bd4
-/*
- * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
- * #define CONSTANT_R3  0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
- * #define CONSTANT_R4  0x0ccaa009eLL
- */
-.Lconstant_R4R3:
-       .octa 0x00000000ccaa009e00000001751997d0
-/*
- * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
- * #define CONSTANT_R5  0x163cd6124LL
- */
-.Lconstant_R5:
-       .octa 0x00000000000000000000000163cd6124
-.Lconstant_mask32:
-       .octa 0x000000000000000000000000FFFFFFFF
-/*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
- * #define CONSTANT_RU  0x1F7011641LL
- */
-.Lconstant_RUpoly:
-       .octa 0x00000001F701164100000001DB710641
-
-#define CONSTANT %xmm0
-
-#ifdef __x86_64__
-#define CRC     %edi
-#define BUF     %rsi
-#define LEN     %rdx
-#else
-#define CRC     %eax
-#define BUF     %edx
-#define LEN     %ecx
-#endif
-
-
-
-.text
-/**
- *      Calculate crc32
- *      CRC - initial crc32
- *      BUF - buffer (16 bytes aligned)
- *      LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
- *      return %eax crc32
- *      u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
- */
-
-SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
-       movdqa  (BUF), %xmm1
-       movdqa  0x10(BUF), %xmm2
-       movdqa  0x20(BUF), %xmm3
-       movdqa  0x30(BUF), %xmm4
-       movd    CRC, CONSTANT
-       pxor    CONSTANT, %xmm1
-       sub     $0x40, LEN
-       add     $0x40, BUF
-       cmp     $0x40, LEN
-       jb      .Lless_64
-
-#ifdef __x86_64__
-       movdqa .Lconstant_R2R1(%rip), CONSTANT
-#else
-       movdqa .Lconstant_R2R1, CONSTANT
-#endif
-
-.Lloop_64:/*  64 bytes Full cache line folding */
-       prefetchnta    0x40(BUF)
-       movdqa  %xmm1, %xmm5
-       movdqa  %xmm2, %xmm6
-       movdqa  %xmm3, %xmm7
-#ifdef __x86_64__
-       movdqa  %xmm4, %xmm8
-#endif
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x00, CONSTANT, %xmm2
-       pclmulqdq $0x00, CONSTANT, %xmm3
-#ifdef __x86_64__
-       pclmulqdq $0x00, CONSTANT, %xmm4
-#endif
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pclmulqdq $0x11, CONSTANT, %xmm6
-       pclmulqdq $0x11, CONSTANT, %xmm7
-#ifdef __x86_64__
-       pclmulqdq $0x11, CONSTANT, %xmm8
-#endif
-       pxor    %xmm5, %xmm1
-       pxor    %xmm6, %xmm2
-       pxor    %xmm7, %xmm3
-#ifdef __x86_64__
-       pxor    %xmm8, %xmm4
-#else
-       /* xmm8 unsupported for x32 */
-       movdqa  %xmm4, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm4
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm4
-#endif
-
-       pxor    (BUF), %xmm1
-       pxor    0x10(BUF), %xmm2
-       pxor    0x20(BUF), %xmm3
-       pxor    0x30(BUF), %xmm4
-
-       sub     $0x40, LEN
-       add     $0x40, BUF
-       cmp     $0x40, LEN
-       jge     .Lloop_64
-.Lless_64:/*  Folding cache line into 128bit */
-#ifdef __x86_64__
-       movdqa  .Lconstant_R4R3(%rip), CONSTANT
-#else
-       movdqa  .Lconstant_R4R3, CONSTANT
-#endif
-       prefetchnta     (BUF)
-
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    %xmm2, %xmm1
-
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    %xmm3, %xmm1
-
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    %xmm4, %xmm1
-
-       cmp     $0x10, LEN
-       jb      .Lfold_64
-.Lloop_16:/* Folding rest buffer into 128bit */
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    (BUF), %xmm1
-       sub     $0x10, LEN
-       add     $0x10, BUF
-       cmp     $0x10, LEN
-       jge     .Lloop_16
-
-.Lfold_64:
-       /* perform the last 64 bit fold, also adds 32 zeroes
-        * to the input stream */
-       pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
-       psrldq  $0x08, %xmm1
-       pxor    CONSTANT, %xmm1
-
-       /* final 32-bit fold */
-       movdqa  %xmm1, %xmm2
-#ifdef __x86_64__
-       movdqa  .Lconstant_R5(%rip), CONSTANT
-       movdqa  .Lconstant_mask32(%rip), %xmm3
-#else
-       movdqa  .Lconstant_R5, CONSTANT
-       movdqa  .Lconstant_mask32, %xmm3
-#endif
-       psrldq  $0x04, %xmm2
-       pand    %xmm3, %xmm1
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pxor    %xmm2, %xmm1
-
-       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-#ifdef __x86_64__
-       movdqa  .Lconstant_RUpoly(%rip), CONSTANT
-#else
-       movdqa  .Lconstant_RUpoly, CONSTANT
-#endif
-       movdqa  %xmm1, %xmm2
-       pand    %xmm3, %xmm1
-       pclmulqdq $0x10, CONSTANT, %xmm1
-       pand    %xmm3, %xmm1
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pxor    %xmm2, %xmm1
-       pextrd  $0x01, %xmm1, %eax
-
-       RET
-SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c

deleted file mode 100644 (file)

index 9d14eac..0000000
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/cpufeatures.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-#define CHKSUM_BLOCK_SIZE      1
-#define CHKSUM_DIGEST_SIZE     4
-
-#define PCLMUL_MIN_LEN         64L     /* minimum size of buffer
-                                        * for crc32_pclmul_le_16 */
-#define SCALE_F                        16L     /* size of xmm register */
-#define SCALE_F_MASK           (SCALE_F - 1)
-
-u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
-
-static u32 __attribute__((pure))
-       crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
-{
-       unsigned int iquotient;
-       unsigned int iremainder;
-       unsigned int prealign;
-
-       if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable())
-               return crc32_le(crc, p, len);
-
-       if ((long)p & SCALE_F_MASK) {
-               /* align p to 16 byte */
-               prealign = SCALE_F - ((long)p & SCALE_F_MASK);
-
-               crc = crc32_le(crc, p, prealign);
-               len -= prealign;
-               p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
-                                    ~SCALE_F_MASK);
-       }
-       iquotient = len & (~SCALE_F_MASK);
-       iremainder = len & SCALE_F_MASK;
-
-       kernel_fpu_begin();
-       crc = crc32_pclmul_le_16(crc, p, iquotient);
-       kernel_fpu_end();
-
-       if (iremainder)
-               crc = crc32_le(crc, p + iquotient, iremainder);
-
-       return crc;
-}
-
-static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
-{
-       u32 *key = crypto_tfm_ctx(tfm);
-
-       *key = 0;
-
-       return 0;
-}
-
-static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
-                       unsigned int keylen)
-{
-       u32 *mctx = crypto_shash_ctx(hash);
-
-       if (keylen != sizeof(u32))
-               return -EINVAL;
-       *mctx = le32_to_cpup((__le32 *)key);
-       return 0;
-}
-
-static int crc32_pclmul_init(struct shash_desc *desc)
-{
-       u32 *mctx = crypto_shash_ctx(desc->tfm);
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = *mctx;
-
-       return 0;
-}
-
-static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
-                              unsigned int len)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = crc32_pclmul_le(*crcp, data, len);
-       return 0;
-}
-
-/* No final XOR 0xFFFFFFFF, like crc32_le */
-static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
-                               u8 *out)
-{
-       *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
-       return 0;
-}
-
-static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
-                             unsigned int len, u8 *out)
-{
-       return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *(__le32 *)out = cpu_to_le32p(crcp);
-       return 0;
-}
-
-static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
-                              unsigned int len, u8 *out)
-{
-       return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-
-static struct shash_alg alg = {
-       .setkey         = crc32_pclmul_setkey,
-       .init           = crc32_pclmul_init,
-       .update         = crc32_pclmul_update,
-       .final          = crc32_pclmul_final,
-       .finup          = crc32_pclmul_finup,
-       .digest         = crc32_pclmul_digest,
-       .descsize       = sizeof(u32),
-       .digestsize     = CHKSUM_DIGEST_SIZE,
-       .base           = {
-                       .cra_name               = "crc32",
-                       .cra_driver_name        = "crc32-pclmul",
-                       .cra_priority           = 200,
-                       .cra_flags              = CRYPTO_ALG_OPTIONAL_KEY,
-                       .cra_blocksize          = CHKSUM_BLOCK_SIZE,
-                       .cra_ctxsize            = sizeof(u32),
-                       .cra_module             = THIS_MODULE,
-                       .cra_init               = crc32_pclmul_cra_init,
-       }
-};
-
-static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
-       X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
-
-
-static int __init crc32_pclmul_mod_init(void)
-{
-
-       if (!x86_match_cpu(crc32pclmul_cpu_id)) {
-               pr_info("PCLMULQDQ-NI instructions are not detected.\n");
-               return -ENODEV;
-       }
-       return crypto_register_shash(&alg);
-}
-
-static void __exit crc32_pclmul_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crc32_pclmul_mod_init);
-module_exit(crc32_pclmul_mod_fini);
-
-MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
-MODULE_DESCRIPTION("CRC32 algorithm (IEEE 802.3) accelerated with PCLMULQDQ");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c

deleted file mode 100644 (file)

index 603d159..0000000
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
- * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
- * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2A: Instruction Set Reference, A-M
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- *          Kent Liu <kent.liu@intel.com>
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/cpufeatures.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-#define CHKSUM_BLOCK_SIZE      1
-#define CHKSUM_DIGEST_SIZE     4
-
-#define SCALE_F        sizeof(unsigned long)
-
-#ifdef CONFIG_X86_64
-#define CRC32_INST "crc32q %1, %q0"
-#else
-#define CRC32_INST "crc32l %1, %0"
-#endif
-
-#ifdef CONFIG_X86_64
-/*
- * use carryless multiply version of crc32c when buffer
- * size is >= 512 to account
- * for fpu state save/restore overhead.
- */
-#define CRC32C_PCL_BREAKEVEN   512
-
-asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-#endif /* CONFIG_X86_64 */
-
-static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
-{
-       while (length--) {
-               asm("crc32b %1, %0"
-                   : "+r" (crc) : "rm" (*data));
-               data++;
-       }
-
-       return crc;
-}
-
-static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
-{
-       unsigned int iquotient = len / SCALE_F;
-       unsigned int iremainder = len % SCALE_F;
-       unsigned long *ptmp = (unsigned long *)p;
-
-       while (iquotient--) {
-               asm(CRC32_INST
-                   : "+r" (crc) : "rm" (*ptmp));
-               ptmp++;
-       }
-
-       if (iremainder)
-               crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
-                                iremainder);
-
-       return crc;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
-                       unsigned int keylen)
-{
-       u32 *mctx = crypto_shash_ctx(hash);
-
-       if (keylen != sizeof(u32))
-               return -EINVAL;
-       *mctx = le32_to_cpup((__le32 *)key);
-       return 0;
-}
-
-static int crc32c_intel_init(struct shash_desc *desc)
-{
-       u32 *mctx = crypto_shash_ctx(desc->tfm);
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = *mctx;
-
-       return 0;
-}
-
-static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
-                              unsigned int len)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = crc32c_intel_le_hw(*crcp, data, len);
-       return 0;
-}
-
-static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
-                               u8 *out)
-{
-       *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
-       return 0;
-}
-
-static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
-                             unsigned int len, u8 *out)
-{
-       return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *(__le32 *)out = ~cpu_to_le32p(crcp);
-       return 0;
-}
-
-static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
-                              unsigned int len, u8 *out)
-{
-       return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-
-static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
-{
-       u32 *key = crypto_tfm_ctx(tfm);
-
-       *key = ~0;
-
-       return 0;
-}
-
-#ifdef CONFIG_X86_64
-static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
-                              unsigned int len)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       /*
-        * use faster PCL version if datasize is large enough to
-        * overcome kernel fpu state save/restore overhead
-        */
-       if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
-               kernel_fpu_begin();
-               *crcp = crc32c_x86_3way(*crcp, data, len);
-               kernel_fpu_end();
-       } else
-               *crcp = crc32c_intel_le_hw(*crcp, data, len);
-       return 0;
-}
-
-static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
-                               u8 *out)
-{
-       if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
-               kernel_fpu_begin();
-               *(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
-               kernel_fpu_end();
-       } else
-               *(__le32 *)out =
-                       ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
-       return 0;
-}
-
-static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
-                             unsigned int len, u8 *out)
-{
-       return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
-                              unsigned int len, u8 *out)
-{
-       return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-#endif /* CONFIG_X86_64 */
-
-static struct shash_alg alg = {
-       .setkey                 =       crc32c_intel_setkey,
-       .init                   =       crc32c_intel_init,
-       .update                 =       crc32c_intel_update,
-       .final                  =       crc32c_intel_final,
-       .finup                  =       crc32c_intel_finup,
-       .digest                 =       crc32c_intel_digest,
-       .descsize               =       sizeof(u32),
-       .digestsize             =       CHKSUM_DIGEST_SIZE,
-       .base                   =       {
-               .cra_name               =       "crc32c",
-               .cra_driver_name        =       "crc32c-intel",
-               .cra_priority           =       200,
-               .cra_flags              =       CRYPTO_ALG_OPTIONAL_KEY,
-               .cra_blocksize          =       CHKSUM_BLOCK_SIZE,
-               .cra_ctxsize            =       sizeof(u32),
-               .cra_module             =       THIS_MODULE,
-               .cra_init               =       crc32c_intel_cra_init,
-       }
-};
-
-static const struct x86_cpu_id crc32c_cpu_id[] = {
-       X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
-
-static int __init crc32c_intel_mod_init(void)
-{
-       if (!x86_match_cpu(crc32c_cpu_id))
-               return -ENODEV;
-#ifdef CONFIG_X86_64
-       if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
-               alg.update = crc32c_pcl_intel_update;
-               alg.finup = crc32c_pcl_intel_finup;
-               alg.digest = crc32c_pcl_intel_digest;
-       }
-#endif
-       return crypto_register_shash(&alg);
-}
-
-static void __exit crc32c_intel_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crc32c_intel_mod_init);
-module_exit(crc32c_intel_mod_fini);
-
-MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-intel");
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S

deleted file mode 100644 (file)

index 9b87705..0000000
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
- *
- * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
- * downloaded from:
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
- *
- * Copyright (C) 2012 Intel Corporation.
- * Copyright 2024 Google LLC
- *
- * Authors:
- *     Wajdi Feghali <wajdi.k.feghali@intel.com>
- *     James Guilford <james.guilford@intel.com>
- *     David Cote <david.m.cote@intel.com>
- *     Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/linkage.h>
-
-## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
-
-# Define threshold below which buffers are considered "small" and routed to
-# regular CRC code that does not interleave the CRC instructions.
-#define SMALL_SIZE 200
-
-# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-
-.text
-SYM_FUNC_START(crc32c_x86_3way)
-#define    crc0                  %edi
-#define    crc0_q        %rdi
-#define    bufp                  %rsi
-#define    bufp_d        %esi
-#define    len           %rdx
-#define    len_dw        %edx
-#define    n_misaligned          %ecx /* overlaps chunk_bytes! */
-#define    n_misaligned_q %rcx
-#define    chunk_bytes   %ecx /* overlaps n_misaligned! */
-#define    chunk_bytes_q  %rcx
-#define    crc1                  %r8
-#define    crc2                  %r9
-
-       cmp     $SMALL_SIZE, len
-       jb      .Lsmall
-
-       ################################################################
-       ## 1) ALIGN:
-       ################################################################
-       mov     bufp_d, n_misaligned
-       neg     n_misaligned
-       and     $7, n_misaligned        # calculate the misalignment amount of
-                                       # the address
-       je      .Laligned               # Skip if aligned
-
-       # Process 1 <= n_misaligned <= 7 bytes individually in order to align
-       # the remaining data to an 8-byte boundary.
-.Ldo_align:
-       movq    (bufp), %rax
-       add     n_misaligned_q, bufp
-       sub     n_misaligned_q, len
-.Lalign_loop:
-       crc32b  %al, crc0               # compute crc32 of 1-byte
-       shr     $8, %rax                # get next byte
-       dec     n_misaligned
-       jne     .Lalign_loop
-.Laligned:
-
-       ################################################################
-       ## 2) PROCESS BLOCK:
-       ################################################################
-
-       cmp     $128*24, len
-       jae     .Lfull_block
-
-.Lpartial_block:
-       # Compute floor(len / 24) to get num qwords to process from each lane.
-       imul    $2731, len_dw, %eax     # 2731 = ceil(2^16 / 24)
-       shr     $16, %eax
-       jmp     .Lcrc_3lanes
-
-.Lfull_block:
-       # Processing 128 qwords from each lane.
-       mov     $128, %eax
-
-       ################################################################
-       ## 3) CRC each of three lanes:
-       ################################################################
-
-.Lcrc_3lanes:
-       xor     crc1,crc1
-       xor     crc2,crc2
-       mov     %eax, chunk_bytes
-       shl     $3, chunk_bytes         # num bytes to process from each lane
-       sub     $5, %eax                # 4 for 4x_loop, 1 for special last iter
-       jl      .Lcrc_3lanes_4x_done
-
-       # Unroll the loop by a factor of 4 to reduce the overhead of the loop
-       # bookkeeping instructions, which can compete with crc32q for the ALUs.
-.Lcrc_3lanes_4x_loop:
-       crc32q  (bufp), crc0_q
-       crc32q  (bufp,chunk_bytes_q), crc1
-       crc32q  (bufp,chunk_bytes_q,2), crc2
-       crc32q  8(bufp), crc0_q
-       crc32q  8(bufp,chunk_bytes_q), crc1
-       crc32q  8(bufp,chunk_bytes_q,2), crc2
-       crc32q  16(bufp), crc0_q
-       crc32q  16(bufp,chunk_bytes_q), crc1
-       crc32q  16(bufp,chunk_bytes_q,2), crc2
-       crc32q  24(bufp), crc0_q
-       crc32q  24(bufp,chunk_bytes_q), crc1
-       crc32q  24(bufp,chunk_bytes_q,2), crc2
-       add     $32, bufp
-       sub     $4, %eax
-       jge     .Lcrc_3lanes_4x_loop
-
-.Lcrc_3lanes_4x_done:
-       add     $4, %eax
-       jz      .Lcrc_3lanes_last_qword
-
-.Lcrc_3lanes_1x_loop:
-       crc32q  (bufp), crc0_q
-       crc32q  (bufp,chunk_bytes_q), crc1
-       crc32q  (bufp,chunk_bytes_q,2), crc2
-       add     $8, bufp
-       dec     %eax
-       jnz     .Lcrc_3lanes_1x_loop
-
-.Lcrc_3lanes_last_qword:
-       crc32q  (bufp), crc0_q
-       crc32q  (bufp,chunk_bytes_q), crc1
-# SKIP  crc32q (bufp,chunk_bytes_q,2), crc2    ; Don't do this one yet
-
-       ################################################################
-       ## 4) Combine three results:
-       ################################################################
-
-       lea     (K_table-8)(%rip), %rax         # first entry is for idx 1
-       pmovzxdq (%rax,chunk_bytes_q), %xmm0    # 2 consts: K1:K2
-       lea     (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
-       sub     %rax, len                       # len -= chunk_bytes * 3
-
-       movq    crc0_q, %xmm1                   # CRC for block 1
-       pclmulqdq $0x00, %xmm0, %xmm1           # Multiply by K2
-
-       movq    crc1, %xmm2                     # CRC for block 2
-       pclmulqdq $0x10, %xmm0, %xmm2           # Multiply by K1
-
-       pxor    %xmm2,%xmm1
-       movq    %xmm1, %rax
-       xor     (bufp,chunk_bytes_q,2), %rax
-       mov     crc2, crc0_q
-       crc32   %rax, crc0_q
-       lea     8(bufp,chunk_bytes_q,2), bufp
-
-       ################################################################
-       ## 5) If more blocks remain, goto (2):
-       ################################################################
-
-       cmp     $128*24, len
-       jae     .Lfull_block
-       cmp     $SMALL_SIZE, len
-       jae     .Lpartial_block
-
-       #######################################################################
-       ## 6) Process any remainder without interleaving:
-       #######################################################################
-.Lsmall:
-       test    len_dw, len_dw
-       jz      .Ldone
-       mov     len_dw, %eax
-       shr     $3, %eax
-       jz      .Ldo_dword
-.Ldo_qwords:
-       crc32q  (bufp), crc0_q
-       add     $8, bufp
-       dec     %eax
-       jnz     .Ldo_qwords
-.Ldo_dword:
-       test    $4, len_dw
-       jz      .Ldo_word
-       crc32l  (bufp), crc0
-       add     $4, bufp
-.Ldo_word:
-       test    $2, len_dw
-       jz      .Ldo_byte
-       crc32w  (bufp), crc0
-       add     $2, bufp
-.Ldo_byte:
-       test    $1, len_dw
-       jz      .Ldone
-       crc32b  (bufp), crc0
-.Ldone:
-       mov     crc0, %eax
-        RET
-SYM_FUNC_END(crc32c_x86_3way)
-
-.section       .rodata, "a", @progbits
-       ################################################################
-       ## PCLMULQDQ tables
-       ## Table is 128 entries x 2 words (8 bytes) each
-       ################################################################
-.align 8
-K_table:
-       .long 0x493c7d27, 0x00000001
-       .long 0xba4fc28e, 0x493c7d27
-       .long 0xddc0152b, 0xf20c0dfe
-       .long 0x9e4addf8, 0xba4fc28e
-       .long 0x39d3b296, 0x3da6d0cb
-       .long 0x0715ce53, 0xddc0152b
-       .long 0x47db8317, 0x1c291d04
-       .long 0x0d3b6092, 0x9e4addf8
-       .long 0xc96cfdc0, 0x740eef02
-       .long 0x878a92a7, 0x39d3b296
-       .long 0xdaece73e, 0x083a6eec
-       .long 0xab7aff2a, 0x0715ce53
-       .long 0x2162d385, 0xc49f4f67
-       .long 0x83348832, 0x47db8317
-       .long 0x299847d5, 0x2ad91c30
-       .long 0xb9e02b86, 0x0d3b6092
-       .long 0x18b33a4e, 0x6992cea2
-       .long 0xb6dd949b, 0xc96cfdc0
-       .long 0x78d9ccb7, 0x7e908048
-       .long 0xbac2fd7b, 0x878a92a7
-       .long 0xa60ce07b, 0x1b3d8f29
-       .long 0xce7f39f4, 0xdaece73e
-       .long 0x61d82e56, 0xf1d0f55e
-       .long 0xd270f1a2, 0xab7aff2a
-       .long 0xc619809d, 0xa87ab8a8
-       .long 0x2b3cac5d, 0x2162d385
-       .long 0x65863b64, 0x8462d800
-       .long 0x1b03397f, 0x83348832
-       .long 0xebb883bd, 0x71d111a8
-       .long 0xb3e32c28, 0x299847d5
-       .long 0x064f7f26, 0xffd852c6
-       .long 0xdd7e3b0c, 0xb9e02b86
-       .long 0xf285651c, 0xdcb17aa4
-       .long 0x10746f3c, 0x18b33a4e
-       .long 0xc7a68855, 0xf37c5aee
-       .long 0x271d9844, 0xb6dd949b
-       .long 0x8e766a0c, 0x6051d5a2
-       .long 0x93a5f730, 0x78d9ccb7
-       .long 0x6cb08e5c, 0x18b0d4ff
-       .long 0x6b749fb2, 0xbac2fd7b
-       .long 0x1393e203, 0x21f3d99c
-       .long 0xcec3662e, 0xa60ce07b
-       .long 0x96c515bb, 0x8f158014
-       .long 0xe6fc4e6a, 0xce7f39f4
-       .long 0x8227bb8a, 0xa00457f7
-       .long 0xb0cd4768, 0x61d82e56
-       .long 0x39c7ff35, 0x8d6d2c43
-       .long 0xd7a4825c, 0xd270f1a2
-       .long 0x0ab3844b, 0x00ac29cf
-       .long 0x0167d312, 0xc619809d
-       .long 0xf6076544, 0xe9adf796
-       .long 0x26f6a60a, 0x2b3cac5d
-       .long 0xa741c1bf, 0x96638b34
-       .long 0x98d8d9cb, 0x65863b64
-       .long 0x49c3cc9c, 0xe0e9f351
-       .long 0x68bce87a, 0x1b03397f
-       .long 0x57a3d037, 0x9af01f2d
-       .long 0x6956fc3b, 0xebb883bd
-       .long 0x42d98888, 0x2cff42cf
-       .long 0x3771e98f, 0xb3e32c28
-       .long 0xb42ae3d9, 0x88f25a3a
-       .long 0x2178513a, 0x064f7f26
-       .long 0xe0ac139e, 0x4e36f0b0
-       .long 0x170076fa, 0xdd7e3b0c
-       .long 0x444dd413, 0xbd6f81f8
-       .long 0x6f345e45, 0xf285651c
-       .long 0x41d17b64, 0x91c9bd4b
-       .long 0xff0dba97, 0x10746f3c
-       .long 0xa2b73df1, 0x885f087b
-       .long 0xf872e54c, 0xc7a68855
-       .long 0x1e41e9fc, 0x4c144932
-       .long 0x86d8e4d2, 0x271d9844
-       .long 0x651bd98b, 0x52148f02
-       .long 0x5bb8f1bc, 0x8e766a0c
-       .long 0xa90fd27a, 0xa3c6f37a
-       .long 0xb3af077a, 0x93a5f730
-       .long 0x4984d782, 0xd7c0557f
-       .long 0xca6ef3ac, 0x6cb08e5c
-       .long 0x234e0b26, 0x63ded06a
-       .long 0xdd66cbbb, 0x6b749fb2
-       .long 0x4597456a, 0x4d56973c
-       .long 0xe9e28eb4, 0x1393e203
-       .long 0x7b3ff57a, 0x9669c9df
-       .long 0xc9c8b782, 0xcec3662e
-       .long 0x3f70cc6f, 0xe417f38a
-       .long 0x93e106a4, 0x96c515bb
-       .long 0x62ec6c6d, 0x4b9e0f71
-       .long 0xd813b325, 0xe6fc4e6a
-       .long 0x0df04680, 0xd104b8fc
-       .long 0x2342001e, 0x8227bb8a
-       .long 0x0a2a8d7e, 0x5b397730
-       .long 0x6d9a4957, 0xb0cd4768
-       .long 0xe8b6368b, 0xe78eb416
-       .long 0xd2c3ed1a, 0x39c7ff35
-       .long 0x995a5724, 0x61ff0e01
-       .long 0x9ef68d35, 0xd7a4825c
-       .long 0x0c139b31, 0x8d96551c
-       .long 0xf2271e60, 0x0ab3844b
-       .long 0x0b0bf8ca, 0x0bf80dd2
-       .long 0x2664fd8b, 0x0167d312
-       .long 0xed64812d, 0x8821abed
-       .long 0x02ee03b2, 0xf6076544
-       .long 0x8604ae0f, 0x6a45d2b2
-       .long 0x363bd6b3, 0x26f6a60a
-       .long 0x135c83fd, 0xd8d26619
-       .long 0x5fabe670, 0xa741c1bf
-       .long 0x35ec3279, 0xde87806c
-       .long 0x00bcf5f6, 0x98d8d9cb
-       .long 0x8ae00689, 0x14338754
-       .long 0x17f27698, 0x49c3cc9c
-       .long 0x58ca5f00, 0x5bd2011f
-       .long 0xaa7c7ad5, 0x68bce87a
-       .long 0xb5cfca28, 0xdd07448e
-       .long 0xded288f8, 0x57a3d037
-       .long 0x59f229bc, 0xdde8f5b9
-       .long 0x6d390dec, 0x6956fc3b
-       .long 0x37170390, 0xa3e3e02c
-       .long 0x6353c1cc, 0x42d98888
-       .long 0xc4584f5c, 0xd73c7bea
-       .long 0xf48642e9, 0x3771e98f
-       .long 0x531377e2, 0x80ff0093
-       .long 0xdd35bc8d, 0xb42ae3d9
-       .long 0xb25b29f2, 0x8fe4c34d
-       .long 0x9a5ede41, 0x2178513a
-       .long 0xa563905d, 0xdf99fc11
-       .long 0x45cddf4e, 0xe0ac139e
-       .long 0xacfa3103, 0x6c23e841
-       .long 0xa51b6135, 0x170076fa
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile

index 98583a9dbab337e09a2e58905e5200499a496a07..17510da06c9f94aaacffcc7a046d95171ed24759 100644 (file)
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -38,6 +38,10 @@ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
  lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
  lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
  
+obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
+crc32-x86-y := crc32-glue.o crc32-pclmul.o
+crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
+
  obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
  obj-y += iomem.o
  
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c

new file mode 100644 (file)

index 0000000..2dd18a8
--- /dev/null
+++ b/arch/x86/lib/crc32-glue.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * x86-optimized CRC32 functions
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Copyright 2012 Xyratex Technology Limited
+ * Copyright 2024 Google LLC
+ */
+
+#include <asm/cpufeatures.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <linux/crc32.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/* minimum size of buffer for crc32_pclmul_le_16 */
+#define CRC32_PCLMUL_MIN_LEN   64
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
+           static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
+               size_t n = -(uintptr_t)p & 15;
+
+               /* align p to 16-byte boundary */
+               if (n) {
+                       crc = crc32_le_base(crc, p, n);
+                       p += n;
+                       len -= n;
+               }
+               n = round_down(len, 16);
+               kernel_fpu_begin();
+               crc = crc32_pclmul_le_16(crc, p, n);
+               kernel_fpu_end();
+               p += n;
+               len -= n;
+       }
+       if (len)
+               crc = crc32_le_base(crc, p, len);
+       return crc;
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+#ifdef CONFIG_X86_64
+#define CRC32_INST "crc32q %1, %q0"
+#else
+#define CRC32_INST "crc32l %1, %0"
+#endif
+
+/*
+ * Use carryless multiply version of crc32c when buffer size is >= 512 to
+ * account for FPU state save/restore overhead.
+ */
+#define CRC32C_PCLMUL_BREAKEVEN        512
+
+asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       size_t num_longs;
+
+       if (!static_branch_likely(&have_crc32))
+               return crc32c_le_base(crc, p, len);
+
+       if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
+           static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
+               kernel_fpu_begin();
+               crc = crc32c_x86_3way(crc, p, len);
+               kernel_fpu_end();
+               return crc;
+       }
+
+       for (num_longs = len / sizeof(unsigned long);
+            num_longs != 0; num_longs--, p += sizeof(unsigned long))
+               asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+
+       for (len %= sizeof(unsigned long); len; len--, p++)
+               asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+
+       return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+       return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_x86_init(void)
+{
+       if (boot_cpu_has(X86_FEATURE_XMM4_2))
+               static_branch_enable(&have_crc32);
+       if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
+               static_branch_enable(&have_pclmulqdq);
+       return 0;
+}
+arch_initcall(crc32_x86_init);
+
+static void __exit crc32_x86_exit(void)
+{
+}
+module_exit(crc32_x86_exit);
+
+u32 crc32_optimizations(void)
+{
+       u32 optimizations = 0;
+
+       if (static_key_enabled(&have_crc32))
+               optimizations |= CRC32C_OPTIMIZATION;
+       if (static_key_enabled(&have_pclmulqdq))
+               optimizations |= CRC32_LE_OPTIMIZATION;
+       return optimizations;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_DESCRIPTION("x86-optimized CRC32 functions");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc32-pclmul.S b/arch/x86/lib/crc32-pclmul.S

new file mode 100644 (file)

index 0000000..f963778
--- /dev/null
+++ b/arch/x86/lib/crc32-pclmul.S
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
+ * calculation.
+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
+ * at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2B: Instruction Set Reference, N-Z
+ *
+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
+ *           Alexander Boyko <Alexander_Boyko@xyratex.com>
+ */
+
+#include <linux/linkage.h>
+
+
+.section .rodata
+.align 16
+/*
+ * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
+ * #define CONSTANT_R1  0x154442bd4LL
+ *
+ * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
+ * #define CONSTANT_R2  0x1c6e41596LL
+ */
+.Lconstant_R2R1:
+       .octa 0x00000001c6e415960000000154442bd4
+/*
+ * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
+ * #define CONSTANT_R3  0x1751997d0LL
+ *
+ * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
+ * #define CONSTANT_R4  0x0ccaa009eLL
+ */
+.Lconstant_R4R3:
+       .octa 0x00000000ccaa009e00000001751997d0
+/*
+ * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
+ * #define CONSTANT_R5  0x163cd6124LL
+ */
+.Lconstant_R5:
+       .octa 0x00000000000000000000000163cd6124
+.Lconstant_mask32:
+       .octa 0x000000000000000000000000FFFFFFFF
+/*
+ * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
+ *
+ * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
+ * #define CONSTANT_RU  0x1F7011641LL
+ */
+.Lconstant_RUpoly:
+       .octa 0x00000001F701164100000001DB710641
+
+#define CONSTANT %xmm0
+
+#ifdef __x86_64__
+#define CRC     %edi
+#define BUF     %rsi
+#define LEN     %rdx
+#else
+#define CRC     %eax
+#define BUF     %edx
+#define LEN     %ecx
+#endif
+
+
+
+.text
+/**
+ *      Calculate crc32
+ *      CRC - initial crc32
+ *      BUF - buffer (16 bytes aligned)
+ *      LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
+ *      return %eax crc32
+ *      u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+ */
+
+SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
+       movdqa  (BUF), %xmm1
+       movdqa  0x10(BUF), %xmm2
+       movdqa  0x20(BUF), %xmm3
+       movdqa  0x30(BUF), %xmm4
+       movd    CRC, CONSTANT
+       pxor    CONSTANT, %xmm1
+       sub     $0x40, LEN
+       add     $0x40, BUF
+       cmp     $0x40, LEN
+       jb      .Lless_64
+
+#ifdef __x86_64__
+       movdqa .Lconstant_R2R1(%rip), CONSTANT
+#else
+       movdqa .Lconstant_R2R1, CONSTANT
+#endif
+
+.Lloop_64:/*  64 bytes Full cache line folding */
+       prefetchnta    0x40(BUF)
+       movdqa  %xmm1, %xmm5
+       movdqa  %xmm2, %xmm6
+       movdqa  %xmm3, %xmm7
+#ifdef __x86_64__
+       movdqa  %xmm4, %xmm8
+#endif
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm2
+       pclmulqdq $0x00, CONSTANT, %xmm3
+#ifdef __x86_64__
+       pclmulqdq $0x00, CONSTANT, %xmm4
+#endif
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pclmulqdq $0x11, CONSTANT, %xmm6
+       pclmulqdq $0x11, CONSTANT, %xmm7
+#ifdef __x86_64__
+       pclmulqdq $0x11, CONSTANT, %xmm8
+#endif
+       pxor    %xmm5, %xmm1
+       pxor    %xmm6, %xmm2
+       pxor    %xmm7, %xmm3
+#ifdef __x86_64__
+       pxor    %xmm8, %xmm4
+#else
+       /* xmm8 unsupported for x32 */
+       movdqa  %xmm4, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm4
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm4
+#endif
+
+       pxor    (BUF), %xmm1
+       pxor    0x10(BUF), %xmm2
+       pxor    0x20(BUF), %xmm3
+       pxor    0x30(BUF), %xmm4
+
+       sub     $0x40, LEN
+       add     $0x40, BUF
+       cmp     $0x40, LEN
+       jge     .Lloop_64
+.Lless_64:/*  Folding cache line into 128bit */
+#ifdef __x86_64__
+       movdqa  .Lconstant_R4R3(%rip), CONSTANT
+#else
+       movdqa  .Lconstant_R4R3, CONSTANT
+#endif
+       prefetchnta     (BUF)
+
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm2, %xmm1
+
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm3, %xmm1
+
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm4, %xmm1
+
+       cmp     $0x10, LEN
+       jb      .Lfold_64
+.Lloop_16:/* Folding rest buffer into 128bit */
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    (BUF), %xmm1
+       sub     $0x10, LEN
+       add     $0x10, BUF
+       cmp     $0x10, LEN
+       jge     .Lloop_16
+
+.Lfold_64:
+       /* perform the last 64 bit fold, also adds 32 zeroes
+        * to the input stream */
+       pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
+       psrldq  $0x08, %xmm1
+       pxor    CONSTANT, %xmm1
+
+       /* final 32-bit fold */
+       movdqa  %xmm1, %xmm2
+#ifdef __x86_64__
+       movdqa  .Lconstant_R5(%rip), CONSTANT
+       movdqa  .Lconstant_mask32(%rip), %xmm3
+#else
+       movdqa  .Lconstant_R5, CONSTANT
+       movdqa  .Lconstant_mask32, %xmm3
+#endif
+       psrldq  $0x04, %xmm2
+       pand    %xmm3, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pxor    %xmm2, %xmm1
+
+       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
+#ifdef __x86_64__
+       movdqa  .Lconstant_RUpoly(%rip), CONSTANT
+#else
+       movdqa  .Lconstant_RUpoly, CONSTANT
+#endif
+       movdqa  %xmm1, %xmm2
+       pand    %xmm3, %xmm1
+       pclmulqdq $0x10, CONSTANT, %xmm1
+       pand    %xmm3, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pxor    %xmm2, %xmm1
+       pextrd  $0x01, %xmm1, %eax
+
+       RET
+SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/lib/crc32c-3way.S b/arch/x86/lib/crc32c-3way.S

new file mode 100644 (file)

index 0000000..9b87705
--- /dev/null
+++ b/arch/x86/lib/crc32c-3way.S
@@ -0,0 +1,360 @@
+/*
+ * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
+ *
+ * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
+ * downloaded from:
+ * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
+ * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ * Copyright 2024 Google LLC
+ *
+ * Authors:
+ *     Wajdi Feghali <wajdi.k.feghali@intel.com>
+ *     James Guilford <james.guilford@intel.com>
+ *     David Cote <david.m.cote@intel.com>
+ *     Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/linkage.h>
+
+## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
+
+# Define threshold below which buffers are considered "small" and routed to
+# regular CRC code that does not interleave the CRC instructions.
+#define SMALL_SIZE 200
+
+# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
+
+.text
+SYM_FUNC_START(crc32c_x86_3way)
+#define    crc0                  %edi
+#define    crc0_q        %rdi
+#define    bufp                  %rsi
+#define    bufp_d        %esi
+#define    len           %rdx
+#define    len_dw        %edx
+#define    n_misaligned          %ecx /* overlaps chunk_bytes! */
+#define    n_misaligned_q %rcx
+#define    chunk_bytes   %ecx /* overlaps n_misaligned! */
+#define    chunk_bytes_q  %rcx
+#define    crc1                  %r8
+#define    crc2                  %r9
+
+       cmp     $SMALL_SIZE, len
+       jb      .Lsmall
+
+       ################################################################
+       ## 1) ALIGN:
+       ################################################################
+       mov     bufp_d, n_misaligned
+       neg     n_misaligned
+       and     $7, n_misaligned        # calculate the misalignment amount of
+                                       # the address
+       je      .Laligned               # Skip if aligned
+
+       # Process 1 <= n_misaligned <= 7 bytes individually in order to align
+       # the remaining data to an 8-byte boundary.
+.Ldo_align:
+       movq    (bufp), %rax
+       add     n_misaligned_q, bufp
+       sub     n_misaligned_q, len
+.Lalign_loop:
+       crc32b  %al, crc0               # compute crc32 of 1-byte
+       shr     $8, %rax                # get next byte
+       dec     n_misaligned
+       jne     .Lalign_loop
+.Laligned:
+
+       ################################################################
+       ## 2) PROCESS BLOCK:
+       ################################################################
+
+       cmp     $128*24, len
+       jae     .Lfull_block
+
+.Lpartial_block:
+       # Compute floor(len / 24) to get num qwords to process from each lane.
+       imul    $2731, len_dw, %eax     # 2731 = ceil(2^16 / 24)
+       shr     $16, %eax
+       jmp     .Lcrc_3lanes
+
+.Lfull_block:
+       # Processing 128 qwords from each lane.
+       mov     $128, %eax
+
+       ################################################################
+       ## 3) CRC each of three lanes:
+       ################################################################
+
+.Lcrc_3lanes:
+       xor     crc1,crc1
+       xor     crc2,crc2
+       mov     %eax, chunk_bytes
+       shl     $3, chunk_bytes         # num bytes to process from each lane
+       sub     $5, %eax                # 4 for 4x_loop, 1 for special last iter
+       jl      .Lcrc_3lanes_4x_done
+
+       # Unroll the loop by a factor of 4 to reduce the overhead of the loop
+       # bookkeeping instructions, which can compete with crc32q for the ALUs.
+.Lcrc_3lanes_4x_loop:
+       crc32q  (bufp), crc0_q
+       crc32q  (bufp,chunk_bytes_q), crc1
+       crc32q  (bufp,chunk_bytes_q,2), crc2
+       crc32q  8(bufp), crc0_q
+       crc32q  8(bufp,chunk_bytes_q), crc1
+       crc32q  8(bufp,chunk_bytes_q,2), crc2
+       crc32q  16(bufp), crc0_q
+       crc32q  16(bufp,chunk_bytes_q), crc1
+       crc32q  16(bufp,chunk_bytes_q,2), crc2
+       crc32q  24(bufp), crc0_q
+       crc32q  24(bufp,chunk_bytes_q), crc1
+       crc32q  24(bufp,chunk_bytes_q,2), crc2
+       add     $32, bufp
+       sub     $4, %eax
+       jge     .Lcrc_3lanes_4x_loop
+
+.Lcrc_3lanes_4x_done:
+       add     $4, %eax
+       jz      .Lcrc_3lanes_last_qword
+
+.Lcrc_3lanes_1x_loop:
+       crc32q  (bufp), crc0_q
+       crc32q  (bufp,chunk_bytes_q), crc1
+       crc32q  (bufp,chunk_bytes_q,2), crc2
+       add     $8, bufp
+       dec     %eax
+       jnz     .Lcrc_3lanes_1x_loop
+
+.Lcrc_3lanes_last_qword:
+       crc32q  (bufp), crc0_q
+       crc32q  (bufp,chunk_bytes_q), crc1
+# SKIP  crc32q (bufp,chunk_bytes_q,2), crc2    ; Don't do this one yet
+
+       ################################################################
+       ## 4) Combine three results:
+       ################################################################
+
+       lea     (K_table-8)(%rip), %rax         # first entry is for idx 1
+       pmovzxdq (%rax,chunk_bytes_q), %xmm0    # 2 consts: K1:K2
+       lea     (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
+       sub     %rax, len                       # len -= chunk_bytes * 3
+
+       movq    crc0_q, %xmm1                   # CRC for block 1
+       pclmulqdq $0x00, %xmm0, %xmm1           # Multiply by K2
+
+       movq    crc1, %xmm2                     # CRC for block 2
+       pclmulqdq $0x10, %xmm0, %xmm2           # Multiply by K1
+
+       pxor    %xmm2,%xmm1
+       movq    %xmm1, %rax
+       xor     (bufp,chunk_bytes_q,2), %rax
+       mov     crc2, crc0_q
+       crc32   %rax, crc0_q
+       lea     8(bufp,chunk_bytes_q,2), bufp
+
+       ################################################################
+       ## 5) If more blocks remain, goto (2):
+       ################################################################
+
+       cmp     $128*24, len
+       jae     .Lfull_block
+       cmp     $SMALL_SIZE, len
+       jae     .Lpartial_block
+
+       #######################################################################
+       ## 6) Process any remainder without interleaving:
+       #######################################################################
+.Lsmall:
+       test    len_dw, len_dw
+       jz      .Ldone
+       mov     len_dw, %eax
+       shr     $3, %eax
+       jz      .Ldo_dword
+.Ldo_qwords:
+       crc32q  (bufp), crc0_q
+       add     $8, bufp
+       dec     %eax
+       jnz     .Ldo_qwords
+.Ldo_dword:
+       test    $4, len_dw
+       jz      .Ldo_word
+       crc32l  (bufp), crc0
+       add     $4, bufp
+.Ldo_word:
+       test    $2, len_dw
+       jz      .Ldo_byte
+       crc32w  (bufp), crc0
+       add     $2, bufp
+.Ldo_byte:
+       test    $1, len_dw
+       jz      .Ldone
+       crc32b  (bufp), crc0
+.Ldone:
+       mov     crc0, %eax
+        RET
+SYM_FUNC_END(crc32c_x86_3way)
+
+.section       .rodata, "a", @progbits
+       ################################################################
+       ## PCLMULQDQ tables
+       ## Table is 128 entries x 2 words (8 bytes) each
+       ################################################################
+.align 8
+K_table:
+       .long 0x493c7d27, 0x00000001
+       .long 0xba4fc28e, 0x493c7d27
+       .long 0xddc0152b, 0xf20c0dfe
+       .long 0x9e4addf8, 0xba4fc28e
+       .long 0x39d3b296, 0x3da6d0cb
+       .long 0x0715ce53, 0xddc0152b
+       .long 0x47db8317, 0x1c291d04
+       .long 0x0d3b6092, 0x9e4addf8
+       .long 0xc96cfdc0, 0x740eef02
+       .long 0x878a92a7, 0x39d3b296
+       .long 0xdaece73e, 0x083a6eec
+       .long 0xab7aff2a, 0x0715ce53
+       .long 0x2162d385, 0xc49f4f67
+       .long 0x83348832, 0x47db8317
+       .long 0x299847d5, 0x2ad91c30
+       .long 0xb9e02b86, 0x0d3b6092
+       .long 0x18b33a4e, 0x6992cea2
+       .long 0xb6dd949b, 0xc96cfdc0
+       .long 0x78d9ccb7, 0x7e908048
+       .long 0xbac2fd7b, 0x878a92a7
+       .long 0xa60ce07b, 0x1b3d8f29
+       .long 0xce7f39f4, 0xdaece73e
+       .long 0x61d82e56, 0xf1d0f55e
+       .long 0xd270f1a2, 0xab7aff2a
+       .long 0xc619809d, 0xa87ab8a8
+       .long 0x2b3cac5d, 0x2162d385
+       .long 0x65863b64, 0x8462d800
+       .long 0x1b03397f, 0x83348832
+       .long 0xebb883bd, 0x71d111a8
+       .long 0xb3e32c28, 0x299847d5
+       .long 0x064f7f26, 0xffd852c6
+       .long 0xdd7e3b0c, 0xb9e02b86
+       .long 0xf285651c, 0xdcb17aa4
+       .long 0x10746f3c, 0x18b33a4e
+       .long 0xc7a68855, 0xf37c5aee
+       .long 0x271d9844, 0xb6dd949b
+       .long 0x8e766a0c, 0x6051d5a2
+       .long 0x93a5f730, 0x78d9ccb7
+       .long 0x6cb08e5c, 0x18b0d4ff
+       .long 0x6b749fb2, 0xbac2fd7b
+       .long 0x1393e203, 0x21f3d99c
+       .long 0xcec3662e, 0xa60ce07b
+       .long 0x96c515bb, 0x8f158014
+       .long 0xe6fc4e6a, 0xce7f39f4
+       .long 0x8227bb8a, 0xa00457f7
+       .long 0xb0cd4768, 0x61d82e56
+       .long 0x39c7ff35, 0x8d6d2c43
+       .long 0xd7a4825c, 0xd270f1a2
+       .long 0x0ab3844b, 0x00ac29cf
+       .long 0x0167d312, 0xc619809d
+       .long 0xf6076544, 0xe9adf796
+       .long 0x26f6a60a, 0x2b3cac5d
+       .long 0xa741c1bf, 0x96638b34
+       .long 0x98d8d9cb, 0x65863b64
+       .long 0x49c3cc9c, 0xe0e9f351
+       .long 0x68bce87a, 0x1b03397f
+       .long 0x57a3d037, 0x9af01f2d
+       .long 0x6956fc3b, 0xebb883bd
+       .long 0x42d98888, 0x2cff42cf
+       .long 0x3771e98f, 0xb3e32c28
+       .long 0xb42ae3d9, 0x88f25a3a
+       .long 0x2178513a, 0x064f7f26
+       .long 0xe0ac139e, 0x4e36f0b0
+       .long 0x170076fa, 0xdd7e3b0c
+       .long 0x444dd413, 0xbd6f81f8
+       .long 0x6f345e45, 0xf285651c
+       .long 0x41d17b64, 0x91c9bd4b
+       .long 0xff0dba97, 0x10746f3c
+       .long 0xa2b73df1, 0x885f087b
+       .long 0xf872e54c, 0xc7a68855
+       .long 0x1e41e9fc, 0x4c144932
+       .long 0x86d8e4d2, 0x271d9844
+       .long 0x651bd98b, 0x52148f02
+       .long 0x5bb8f1bc, 0x8e766a0c
+       .long 0xa90fd27a, 0xa3c6f37a
+       .long 0xb3af077a, 0x93a5f730
+       .long 0x4984d782, 0xd7c0557f
+       .long 0xca6ef3ac, 0x6cb08e5c
+       .long 0x234e0b26, 0x63ded06a
+       .long 0xdd66cbbb, 0x6b749fb2
+       .long 0x4597456a, 0x4d56973c
+       .long 0xe9e28eb4, 0x1393e203
+       .long 0x7b3ff57a, 0x9669c9df
+       .long 0xc9c8b782, 0xcec3662e
+       .long 0x3f70cc6f, 0xe417f38a
+       .long 0x93e106a4, 0x96c515bb
+       .long 0x62ec6c6d, 0x4b9e0f71
+       .long 0xd813b325, 0xe6fc4e6a
+       .long 0x0df04680, 0xd104b8fc
+       .long 0x2342001e, 0x8227bb8a
+       .long 0x0a2a8d7e, 0x5b397730
+       .long 0x6d9a4957, 0xb0cd4768
+       .long 0xe8b6368b, 0xe78eb416
+       .long 0xd2c3ed1a, 0x39c7ff35
+       .long 0x995a5724, 0x61ff0e01
+       .long 0x9ef68d35, 0xd7a4825c
+       .long 0x0c139b31, 0x8d96551c
+       .long 0xf2271e60, 0x0ab3844b
+       .long 0x0b0bf8ca, 0x0bf80dd2
+       .long 0x2664fd8b, 0x0167d312
+       .long 0xed64812d, 0x8821abed
+       .long 0x02ee03b2, 0xf6076544
+       .long 0x8604ae0f, 0x6a45d2b2
+       .long 0x363bd6b3, 0x26f6a60a
+       .long 0x135c83fd, 0xd8d26619
+       .long 0x5fabe670, 0xa741c1bf
+       .long 0x35ec3279, 0xde87806c
+       .long 0x00bcf5f6, 0x98d8d9cb
+       .long 0x8ae00689, 0x14338754
+       .long 0x17f27698, 0x49c3cc9c
+       .long 0x58ca5f00, 0x5bd2011f
+       .long 0xaa7c7ad5, 0x68bce87a
+       .long 0xb5cfca28, 0xdd07448e
+       .long 0xded288f8, 0x57a3d037
+       .long 0x59f229bc, 0xdde8f5b9
+       .long 0x6d390dec, 0x6956fc3b
+       .long 0x37170390, 0xa3e3e02c
+       .long 0x6353c1cc, 0x42d98888
+       .long 0xc4584f5c, 0xd73c7bea
+       .long 0xf48642e9, 0x3771e98f
+       .long 0x531377e2, 0x80ff0093
+       .long 0xdd35bc8d, 0xb42ae3d9
+       .long 0xb25b29f2, 0x8fe4c34d
+       .long 0x9a5ede41, 0x2178513a
+       .long 0xa563905d, 0xdf99fc11
+       .long 0x45cddf4e, 0xe0ac139e
+       .long 0xacfa3103, 0x6c23e841
+       .long 0xa51b6135, 0x170076fa
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig

index 922b207bc69dca6fe7f6833046b683d151cfbdb5..1c0517a125713a59c9ba1d0c3f1ac424e9db8c3d 100644 (file)
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -4,7 +4,6 @@ config ISCSI_TARGET
         depends on INET
         select CRYPTO
         select CRYPTO_CRC32C
-       select CRYPTO_CRC32C_INTEL if X86
         help
         Say M to enable the SCSI target mode stack. A SCSI target mode stack
         is software that makes local storage available over a storage network
author	Eric Biggers <ebiggers@google.com>
	Mon, 2 Dec 2024 01:08:38 +0000 (17:08 -0800)
committer	Eric Biggers <ebiggers@google.com>
	Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
arch/x86/Kconfig		patch \| blob \| blame \| history
arch/x86/crypto/Kconfig		patch \| blob \| blame \| history
arch/x86/crypto/Makefile		patch \| blob \| blame \| history
arch/x86/crypto/crc32-pclmul_asm.S	[deleted file]	patch \| blob \| blame \| history
arch/x86/crypto/crc32-pclmul_glue.c	[deleted file]	patch \| blob \| blame \| history
arch/x86/crypto/crc32c-intel_glue.c	[deleted file]	patch \| blob \| blame \| history
arch/x86/crypto/crc32c-pcl-intel-asm_64.S	[deleted file]	patch \| blob \| blame \| history
arch/x86/lib/Makefile		patch \| blob \| blame \| history
arch/x86/lib/crc32-glue.c	[new file with mode: 0644]	patch \| blob
arch/x86/lib/crc32-pclmul.S	[new file with mode: 0644]	patch \| blob
arch/x86/lib/crc32c-3way.S	[new file with mode: 0644]	patch \| blob
drivers/target/iscsi/Kconfig		patch \| blob \| blame \| history