x86/crc32: expose CRC32 functions through lib
authorEric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:08:38 +0000 (17:08 -0800)
committerEric Biggers <ebiggers@google.com>
Mon, 2 Dec 2024 01:23:01 +0000 (17:23 -0800)
Move the x86 CRC32 assembly code into the lib directory and wire it up
to the library interface.  This allows it to be used without going
through the crypto API.  It remains usable via the crypto API too via
the shash algorithms that use the library interface.  Thus all the
arch-specific "shash" code becomes unnecessary and is removed.

Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20241202010844.144356-14-ebiggers@kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
12 files changed:
arch/x86/Kconfig
arch/x86/crypto/Kconfig
arch/x86/crypto/Makefile
arch/x86/crypto/crc32-pclmul_asm.S [deleted file]
arch/x86/crypto/crc32-pclmul_glue.c [deleted file]
arch/x86/crypto/crc32c-intel_glue.c [deleted file]
arch/x86/crypto/crc32c-pcl-intel-asm_64.S [deleted file]
arch/x86/lib/Makefile
arch/x86/lib/crc32-glue.c [new file with mode: 0644]
arch/x86/lib/crc32-pclmul.S [new file with mode: 0644]
arch/x86/lib/crc32c-3way.S [new file with mode: 0644]
drivers/target/iscsi/Kconfig

index 9d7bd0ae48c4260f4abb6dbedc696e3915c230ea..4f4fc67fbce7a69ac630bc553eb99a9e6d0fd5d5 100644 (file)
@@ -76,6 +76,7 @@ config X86
        select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION
        select ARCH_HAS_CPU_FINALIZE_INIT
        select ARCH_HAS_CPU_PASID               if IOMMU_SVA
+       select ARCH_HAS_CRC32
        select ARCH_HAS_CURRENT_STACK_POINTER
        select ARCH_HAS_DEBUG_VIRTUAL
        select ARCH_HAS_DEBUG_VM_PGTABLE        if !X86_PAE
index 3d2e38ba524033940ff1124bd644827bc4fba21d..ba9a7e73cd0847a1c00e9724a6e7401aa8c5f6ba 100644 (file)
@@ -492,28 +492,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
          Architecture: x86_64 using:
          - CLMUL-NI (carry-less multiplication new instructions)
 
-config CRYPTO_CRC32C_INTEL
-       tristate "CRC32c (SSE4.2/PCLMULQDQ)"
-       depends on X86
-       select CRYPTO_HASH
-       help
-         CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720)
-
-         Architecture: x86 (32-bit and 64-bit) using:
-         - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction
-         - PCLMULQDQ (carry-less multiplication)
-
-config CRYPTO_CRC32_PCLMUL
-       tristate "CRC32 (PCLMULQDQ)"
-       depends on X86
-       select CRYPTO_HASH
-       select CRC32
-       help
-         CRC32 CRC algorithm (IEEE 802.3)
-
-         Architecture: x86 (32-bit and 64-bit) using:
-         - PCLMULQDQ (carry-less multiplication)
-
 config CRYPTO_CRCT10DIF_PCLMUL
        tristate "CRCT10DIF (PCLMULQDQ)"
        depends on X86 && 64BIT && CRC_T10DIF
index 53b4a277809e0804777ea0c9e60456e899f95783..030b925ca4e28ab65311d6f0c586faabc0557c88 100644 (file)
@@ -75,13 +75,6 @@ ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o
 obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) += polyval-clmulni.o
 polyval-clmulni-y := polyval-clmulni_asm.o polyval-clmulni_glue.o
 
-obj-$(CONFIG_CRYPTO_CRC32C_INTEL) += crc32c-intel.o
-crc32c-intel-y := crc32c-intel_glue.o
-crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
-
-obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
-crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
-
 obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
 crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o
 
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
deleted file mode 100644 (file)
index f963778..0000000
+++ /dev/null
@@ -1,217 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- *           Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
-
-#include <linux/linkage.h>
-
-
-.section .rodata
-.align 16
-/*
- * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
- * #define CONSTANT_R1  0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
- * #define CONSTANT_R2  0x1c6e41596LL
- */
-.Lconstant_R2R1:
-       .octa 0x00000001c6e415960000000154442bd4
-/*
- * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
- * #define CONSTANT_R3  0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
- * #define CONSTANT_R4  0x0ccaa009eLL
- */
-.Lconstant_R4R3:
-       .octa 0x00000000ccaa009e00000001751997d0
-/*
- * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
- * #define CONSTANT_R5  0x163cd6124LL
- */
-.Lconstant_R5:
-       .octa 0x00000000000000000000000163cd6124
-.Lconstant_mask32:
-       .octa 0x000000000000000000000000FFFFFFFF
-/*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
- * #define CONSTANT_RU  0x1F7011641LL
- */
-.Lconstant_RUpoly:
-       .octa 0x00000001F701164100000001DB710641
-
-#define CONSTANT %xmm0
-
-#ifdef __x86_64__
-#define CRC     %edi
-#define BUF     %rsi
-#define LEN     %rdx
-#else
-#define CRC     %eax
-#define BUF     %edx
-#define LEN     %ecx
-#endif
-
-
-
-.text
-/**
- *      Calculate crc32
- *      CRC - initial crc32
- *      BUF - buffer (16 bytes aligned)
- *      LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
- *      return %eax crc32
- *      u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
- */
-
-SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
-       movdqa  (BUF), %xmm1
-       movdqa  0x10(BUF), %xmm2
-       movdqa  0x20(BUF), %xmm3
-       movdqa  0x30(BUF), %xmm4
-       movd    CRC, CONSTANT
-       pxor    CONSTANT, %xmm1
-       sub     $0x40, LEN
-       add     $0x40, BUF
-       cmp     $0x40, LEN
-       jb      .Lless_64
-
-#ifdef __x86_64__
-       movdqa .Lconstant_R2R1(%rip), CONSTANT
-#else
-       movdqa .Lconstant_R2R1, CONSTANT
-#endif
-
-.Lloop_64:/*  64 bytes Full cache line folding */
-       prefetchnta    0x40(BUF)
-       movdqa  %xmm1, %xmm5
-       movdqa  %xmm2, %xmm6
-       movdqa  %xmm3, %xmm7
-#ifdef __x86_64__
-       movdqa  %xmm4, %xmm8
-#endif
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x00, CONSTANT, %xmm2
-       pclmulqdq $0x00, CONSTANT, %xmm3
-#ifdef __x86_64__
-       pclmulqdq $0x00, CONSTANT, %xmm4
-#endif
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pclmulqdq $0x11, CONSTANT, %xmm6
-       pclmulqdq $0x11, CONSTANT, %xmm7
-#ifdef __x86_64__
-       pclmulqdq $0x11, CONSTANT, %xmm8
-#endif
-       pxor    %xmm5, %xmm1
-       pxor    %xmm6, %xmm2
-       pxor    %xmm7, %xmm3
-#ifdef __x86_64__
-       pxor    %xmm8, %xmm4
-#else
-       /* xmm8 unsupported for x32 */
-       movdqa  %xmm4, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm4
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm4
-#endif
-
-       pxor    (BUF), %xmm1
-       pxor    0x10(BUF), %xmm2
-       pxor    0x20(BUF), %xmm3
-       pxor    0x30(BUF), %xmm4
-
-       sub     $0x40, LEN
-       add     $0x40, BUF
-       cmp     $0x40, LEN
-       jge     .Lloop_64
-.Lless_64:/*  Folding cache line into 128bit */
-#ifdef __x86_64__
-       movdqa  .Lconstant_R4R3(%rip), CONSTANT
-#else
-       movdqa  .Lconstant_R4R3, CONSTANT
-#endif
-       prefetchnta     (BUF)
-
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    %xmm2, %xmm1
-
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    %xmm3, %xmm1
-
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    %xmm4, %xmm1
-
-       cmp     $0x10, LEN
-       jb      .Lfold_64
-.Lloop_16:/* Folding rest buffer into 128bit */
-       movdqa  %xmm1, %xmm5
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pclmulqdq $0x11, CONSTANT, %xmm5
-       pxor    %xmm5, %xmm1
-       pxor    (BUF), %xmm1
-       sub     $0x10, LEN
-       add     $0x10, BUF
-       cmp     $0x10, LEN
-       jge     .Lloop_16
-
-.Lfold_64:
-       /* perform the last 64 bit fold, also adds 32 zeroes
-        * to the input stream */
-       pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
-       psrldq  $0x08, %xmm1
-       pxor    CONSTANT, %xmm1
-
-       /* final 32-bit fold */
-       movdqa  %xmm1, %xmm2
-#ifdef __x86_64__
-       movdqa  .Lconstant_R5(%rip), CONSTANT
-       movdqa  .Lconstant_mask32(%rip), %xmm3
-#else
-       movdqa  .Lconstant_R5, CONSTANT
-       movdqa  .Lconstant_mask32, %xmm3
-#endif
-       psrldq  $0x04, %xmm2
-       pand    %xmm3, %xmm1
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pxor    %xmm2, %xmm1
-
-       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-#ifdef __x86_64__
-       movdqa  .Lconstant_RUpoly(%rip), CONSTANT
-#else
-       movdqa  .Lconstant_RUpoly, CONSTANT
-#endif
-       movdqa  %xmm1, %xmm2
-       pand    %xmm3, %xmm1
-       pclmulqdq $0x10, CONSTANT, %xmm1
-       pand    %xmm3, %xmm1
-       pclmulqdq $0x00, CONSTANT, %xmm1
-       pxor    %xmm2, %xmm1
-       pextrd  $0x01, %xmm1, %eax
-
-       RET
-SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
deleted file mode 100644 (file)
index 9d14eac..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-/* GPL HEADER START
- *
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 only,
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License version 2 for more details (a copy is included
- * in the LICENSE file that accompanied this code).
- *
- * You should have received a copy of the GNU General Public License
- * version 2 along with this program; If not, see http://www.gnu.org/licenses
- *
- * Please  visit http://www.xyratex.com/contact if you need additional
- * information or have any questions.
- *
- * GPL HEADER END
- */
-
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation.
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <linux/crc32.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/cpufeatures.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-#define CHKSUM_BLOCK_SIZE      1
-#define CHKSUM_DIGEST_SIZE     4
-
-#define PCLMUL_MIN_LEN         64L     /* minimum size of buffer
-                                        * for crc32_pclmul_le_16 */
-#define SCALE_F                        16L     /* size of xmm register */
-#define SCALE_F_MASK           (SCALE_F - 1)
-
-u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
-
-static u32 __attribute__((pure))
-       crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len)
-{
-       unsigned int iquotient;
-       unsigned int iremainder;
-       unsigned int prealign;
-
-       if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable())
-               return crc32_le(crc, p, len);
-
-       if ((long)p & SCALE_F_MASK) {
-               /* align p to 16 byte */
-               prealign = SCALE_F - ((long)p & SCALE_F_MASK);
-
-               crc = crc32_le(crc, p, prealign);
-               len -= prealign;
-               p = (unsigned char *)(((unsigned long)p + SCALE_F_MASK) &
-                                    ~SCALE_F_MASK);
-       }
-       iquotient = len & (~SCALE_F_MASK);
-       iremainder = len & SCALE_F_MASK;
-
-       kernel_fpu_begin();
-       crc = crc32_pclmul_le_16(crc, p, iquotient);
-       kernel_fpu_end();
-
-       if (iremainder)
-               crc = crc32_le(crc, p + iquotient, iremainder);
-
-       return crc;
-}
-
-static int crc32_pclmul_cra_init(struct crypto_tfm *tfm)
-{
-       u32 *key = crypto_tfm_ctx(tfm);
-
-       *key = 0;
-
-       return 0;
-}
-
-static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key,
-                       unsigned int keylen)
-{
-       u32 *mctx = crypto_shash_ctx(hash);
-
-       if (keylen != sizeof(u32))
-               return -EINVAL;
-       *mctx = le32_to_cpup((__le32 *)key);
-       return 0;
-}
-
-static int crc32_pclmul_init(struct shash_desc *desc)
-{
-       u32 *mctx = crypto_shash_ctx(desc->tfm);
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = *mctx;
-
-       return 0;
-}
-
-static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data,
-                              unsigned int len)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = crc32_pclmul_le(*crcp, data, len);
-       return 0;
-}
-
-/* No final XOR 0xFFFFFFFF, like crc32_le */
-static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int len,
-                               u8 *out)
-{
-       *(__le32 *)out = cpu_to_le32(crc32_pclmul_le(*crcp, data, len));
-       return 0;
-}
-
-static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data,
-                             unsigned int len, u8 *out)
-{
-       return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32_pclmul_final(struct shash_desc *desc, u8 *out)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *(__le32 *)out = cpu_to_le32p(crcp);
-       return 0;
-}
-
-static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data,
-                              unsigned int len, u8 *out)
-{
-       return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-
-static struct shash_alg alg = {
-       .setkey         = crc32_pclmul_setkey,
-       .init           = crc32_pclmul_init,
-       .update         = crc32_pclmul_update,
-       .final          = crc32_pclmul_final,
-       .finup          = crc32_pclmul_finup,
-       .digest         = crc32_pclmul_digest,
-       .descsize       = sizeof(u32),
-       .digestsize     = CHKSUM_DIGEST_SIZE,
-       .base           = {
-                       .cra_name               = "crc32",
-                       .cra_driver_name        = "crc32-pclmul",
-                       .cra_priority           = 200,
-                       .cra_flags              = CRYPTO_ALG_OPTIONAL_KEY,
-                       .cra_blocksize          = CHKSUM_BLOCK_SIZE,
-                       .cra_ctxsize            = sizeof(u32),
-                       .cra_module             = THIS_MODULE,
-                       .cra_init               = crc32_pclmul_cra_init,
-       }
-};
-
-static const struct x86_cpu_id crc32pclmul_cpu_id[] = {
-       X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id);
-
-
-static int __init crc32_pclmul_mod_init(void)
-{
-
-       if (!x86_match_cpu(crc32pclmul_cpu_id)) {
-               pr_info("PCLMULQDQ-NI instructions are not detected.\n");
-               return -ENODEV;
-       }
-       return crypto_register_shash(&alg);
-}
-
-static void __exit crc32_pclmul_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crc32_pclmul_mod_init);
-module_exit(crc32_pclmul_mod_fini);
-
-MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
-MODULE_DESCRIPTION("CRC32 algorithm (IEEE 802.3) accelerated with PCLMULQDQ");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32");
-MODULE_ALIAS_CRYPTO("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
deleted file mode 100644 (file)
index 603d159..0000000
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Using hardware provided CRC32 instruction to accelerate the CRC32 disposal.
- * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE)
- * CRC32 is a new instruction in Intel SSE4.2, the reference can be found at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2A: Instruction Set Reference, A-M
- *
- * Copyright (C) 2008 Intel Corporation
- * Authors: Austin Zhang <austin_zhang@linux.intel.com>
- *          Kent Liu <kent.liu@intel.com>
- */
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-#include <crypto/internal/hash.h>
-#include <crypto/internal/simd.h>
-
-#include <asm/cpufeatures.h>
-#include <asm/cpu_device_id.h>
-#include <asm/simd.h>
-
-#define CHKSUM_BLOCK_SIZE      1
-#define CHKSUM_DIGEST_SIZE     4
-
-#define SCALE_F        sizeof(unsigned long)
-
-#ifdef CONFIG_X86_64
-#define CRC32_INST "crc32q %1, %q0"
-#else
-#define CRC32_INST "crc32l %1, %0"
-#endif
-
-#ifdef CONFIG_X86_64
-/*
- * use carryless multiply version of crc32c when buffer
- * size is >= 512 to account
- * for fpu state save/restore overhead.
- */
-#define CRC32C_PCL_BREAKEVEN   512
-
-asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-#endif /* CONFIG_X86_64 */
-
-static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
-{
-       while (length--) {
-               asm("crc32b %1, %0"
-                   : "+r" (crc) : "rm" (*data));
-               data++;
-       }
-
-       return crc;
-}
-
-static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len)
-{
-       unsigned int iquotient = len / SCALE_F;
-       unsigned int iremainder = len % SCALE_F;
-       unsigned long *ptmp = (unsigned long *)p;
-
-       while (iquotient--) {
-               asm(CRC32_INST
-                   : "+r" (crc) : "rm" (*ptmp));
-               ptmp++;
-       }
-
-       if (iremainder)
-               crc = crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp,
-                                iremainder);
-
-       return crc;
-}
-
-/*
- * Setting the seed allows arbitrary accumulators and flexible XOR policy
- * If your algorithm starts with ~0, then XOR with ~0 before you set
- * the seed.
- */
-static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key,
-                       unsigned int keylen)
-{
-       u32 *mctx = crypto_shash_ctx(hash);
-
-       if (keylen != sizeof(u32))
-               return -EINVAL;
-       *mctx = le32_to_cpup((__le32 *)key);
-       return 0;
-}
-
-static int crc32c_intel_init(struct shash_desc *desc)
-{
-       u32 *mctx = crypto_shash_ctx(desc->tfm);
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = *mctx;
-
-       return 0;
-}
-
-static int crc32c_intel_update(struct shash_desc *desc, const u8 *data,
-                              unsigned int len)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *crcp = crc32c_intel_le_hw(*crcp, data, len);
-       return 0;
-}
-
-static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
-                               u8 *out)
-{
-       *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
-       return 0;
-}
-
-static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data,
-                             unsigned int len, u8 *out)
-{
-       return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_intel_final(struct shash_desc *desc, u8 *out)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       *(__le32 *)out = ~cpu_to_le32p(crcp);
-       return 0;
-}
-
-static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data,
-                              unsigned int len, u8 *out)
-{
-       return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-
-static int crc32c_intel_cra_init(struct crypto_tfm *tfm)
-{
-       u32 *key = crypto_tfm_ctx(tfm);
-
-       *key = ~0;
-
-       return 0;
-}
-
-#ifdef CONFIG_X86_64
-static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
-                              unsigned int len)
-{
-       u32 *crcp = shash_desc_ctx(desc);
-
-       /*
-        * use faster PCL version if datasize is large enough to
-        * overcome kernel fpu state save/restore overhead
-        */
-       if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
-               kernel_fpu_begin();
-               *crcp = crc32c_x86_3way(*crcp, data, len);
-               kernel_fpu_end();
-       } else
-               *crcp = crc32c_intel_le_hw(*crcp, data, len);
-       return 0;
-}
-
-static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
-                               u8 *out)
-{
-       if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
-               kernel_fpu_begin();
-               *(__le32 *)out = ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len));
-               kernel_fpu_end();
-       } else
-               *(__le32 *)out =
-                       ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len));
-       return 0;
-}
-
-static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data,
-                             unsigned int len, u8 *out)
-{
-       return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out);
-}
-
-static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data,
-                              unsigned int len, u8 *out)
-{
-       return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len,
-                                   out);
-}
-#endif /* CONFIG_X86_64 */
-
-static struct shash_alg alg = {
-       .setkey                 =       crc32c_intel_setkey,
-       .init                   =       crc32c_intel_init,
-       .update                 =       crc32c_intel_update,
-       .final                  =       crc32c_intel_final,
-       .finup                  =       crc32c_intel_finup,
-       .digest                 =       crc32c_intel_digest,
-       .descsize               =       sizeof(u32),
-       .digestsize             =       CHKSUM_DIGEST_SIZE,
-       .base                   =       {
-               .cra_name               =       "crc32c",
-               .cra_driver_name        =       "crc32c-intel",
-               .cra_priority           =       200,
-               .cra_flags              =       CRYPTO_ALG_OPTIONAL_KEY,
-               .cra_blocksize          =       CHKSUM_BLOCK_SIZE,
-               .cra_ctxsize            =       sizeof(u32),
-               .cra_module             =       THIS_MODULE,
-               .cra_init               =       crc32c_intel_cra_init,
-       }
-};
-
-static const struct x86_cpu_id crc32c_cpu_id[] = {
-       X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL),
-       {}
-};
-MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id);
-
-static int __init crc32c_intel_mod_init(void)
-{
-       if (!x86_match_cpu(crc32c_cpu_id))
-               return -ENODEV;
-#ifdef CONFIG_X86_64
-       if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
-               alg.update = crc32c_pcl_intel_update;
-               alg.finup = crc32c_pcl_intel_finup;
-               alg.digest = crc32c_pcl_intel_digest;
-       }
-#endif
-       return crypto_register_shash(&alg);
-}
-
-static void __exit crc32c_intel_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crc32c_intel_mod_init);
-module_exit(crc32c_intel_mod_fini);
-
-MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.com>");
-MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
-MODULE_LICENSE("GPL");
-
-MODULE_ALIAS_CRYPTO("crc32c");
-MODULE_ALIAS_CRYPTO("crc32c-intel");
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
deleted file mode 100644 (file)
index 9b87705..0000000
+++ /dev/null
@@ -1,360 +0,0 @@
-/*
- * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
- *
- * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
- * downloaded from:
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
- * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
- *
- * Copyright (C) 2012 Intel Corporation.
- * Copyright 2024 Google LLC
- *
- * Authors:
- *     Wajdi Feghali <wajdi.k.feghali@intel.com>
- *     James Guilford <james.guilford@intel.com>
- *     David Cote <david.m.cote@intel.com>
- *     Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/linkage.h>
-
-## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
-
-# Define threshold below which buffers are considered "small" and routed to
-# regular CRC code that does not interleave the CRC instructions.
-#define SMALL_SIZE 200
-
-# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
-
-.text
-SYM_FUNC_START(crc32c_x86_3way)
-#define    crc0                  %edi
-#define    crc0_q        %rdi
-#define    bufp                  %rsi
-#define    bufp_d        %esi
-#define    len           %rdx
-#define    len_dw        %edx
-#define    n_misaligned          %ecx /* overlaps chunk_bytes! */
-#define    n_misaligned_q %rcx
-#define    chunk_bytes   %ecx /* overlaps n_misaligned! */
-#define    chunk_bytes_q  %rcx
-#define    crc1                  %r8
-#define    crc2                  %r9
-
-       cmp     $SMALL_SIZE, len
-       jb      .Lsmall
-
-       ################################################################
-       ## 1) ALIGN:
-       ################################################################
-       mov     bufp_d, n_misaligned
-       neg     n_misaligned
-       and     $7, n_misaligned        # calculate the misalignment amount of
-                                       # the address
-       je      .Laligned               # Skip if aligned
-
-       # Process 1 <= n_misaligned <= 7 bytes individually in order to align
-       # the remaining data to an 8-byte boundary.
-.Ldo_align:
-       movq    (bufp), %rax
-       add     n_misaligned_q, bufp
-       sub     n_misaligned_q, len
-.Lalign_loop:
-       crc32b  %al, crc0               # compute crc32 of 1-byte
-       shr     $8, %rax                # get next byte
-       dec     n_misaligned
-       jne     .Lalign_loop
-.Laligned:
-
-       ################################################################
-       ## 2) PROCESS BLOCK:
-       ################################################################
-
-       cmp     $128*24, len
-       jae     .Lfull_block
-
-.Lpartial_block:
-       # Compute floor(len / 24) to get num qwords to process from each lane.
-       imul    $2731, len_dw, %eax     # 2731 = ceil(2^16 / 24)
-       shr     $16, %eax
-       jmp     .Lcrc_3lanes
-
-.Lfull_block:
-       # Processing 128 qwords from each lane.
-       mov     $128, %eax
-
-       ################################################################
-       ## 3) CRC each of three lanes:
-       ################################################################
-
-.Lcrc_3lanes:
-       xor     crc1,crc1
-       xor     crc2,crc2
-       mov     %eax, chunk_bytes
-       shl     $3, chunk_bytes         # num bytes to process from each lane
-       sub     $5, %eax                # 4 for 4x_loop, 1 for special last iter
-       jl      .Lcrc_3lanes_4x_done
-
-       # Unroll the loop by a factor of 4 to reduce the overhead of the loop
-       # bookkeeping instructions, which can compete with crc32q for the ALUs.
-.Lcrc_3lanes_4x_loop:
-       crc32q  (bufp), crc0_q
-       crc32q  (bufp,chunk_bytes_q), crc1
-       crc32q  (bufp,chunk_bytes_q,2), crc2
-       crc32q  8(bufp), crc0_q
-       crc32q  8(bufp,chunk_bytes_q), crc1
-       crc32q  8(bufp,chunk_bytes_q,2), crc2
-       crc32q  16(bufp), crc0_q
-       crc32q  16(bufp,chunk_bytes_q), crc1
-       crc32q  16(bufp,chunk_bytes_q,2), crc2
-       crc32q  24(bufp), crc0_q
-       crc32q  24(bufp,chunk_bytes_q), crc1
-       crc32q  24(bufp,chunk_bytes_q,2), crc2
-       add     $32, bufp
-       sub     $4, %eax
-       jge     .Lcrc_3lanes_4x_loop
-
-.Lcrc_3lanes_4x_done:
-       add     $4, %eax
-       jz      .Lcrc_3lanes_last_qword
-
-.Lcrc_3lanes_1x_loop:
-       crc32q  (bufp), crc0_q
-       crc32q  (bufp,chunk_bytes_q), crc1
-       crc32q  (bufp,chunk_bytes_q,2), crc2
-       add     $8, bufp
-       dec     %eax
-       jnz     .Lcrc_3lanes_1x_loop
-
-.Lcrc_3lanes_last_qword:
-       crc32q  (bufp), crc0_q
-       crc32q  (bufp,chunk_bytes_q), crc1
-# SKIP  crc32q (bufp,chunk_bytes_q,2), crc2    ; Don't do this one yet
-
-       ################################################################
-       ## 4) Combine three results:
-       ################################################################
-
-       lea     (K_table-8)(%rip), %rax         # first entry is for idx 1
-       pmovzxdq (%rax,chunk_bytes_q), %xmm0    # 2 consts: K1:K2
-       lea     (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
-       sub     %rax, len                       # len -= chunk_bytes * 3
-
-       movq    crc0_q, %xmm1                   # CRC for block 1
-       pclmulqdq $0x00, %xmm0, %xmm1           # Multiply by K2
-
-       movq    crc1, %xmm2                     # CRC for block 2
-       pclmulqdq $0x10, %xmm0, %xmm2           # Multiply by K1
-
-       pxor    %xmm2,%xmm1
-       movq    %xmm1, %rax
-       xor     (bufp,chunk_bytes_q,2), %rax
-       mov     crc2, crc0_q
-       crc32   %rax, crc0_q
-       lea     8(bufp,chunk_bytes_q,2), bufp
-
-       ################################################################
-       ## 5) If more blocks remain, goto (2):
-       ################################################################
-
-       cmp     $128*24, len
-       jae     .Lfull_block
-       cmp     $SMALL_SIZE, len
-       jae     .Lpartial_block
-
-       #######################################################################
-       ## 6) Process any remainder without interleaving:
-       #######################################################################
-.Lsmall:
-       test    len_dw, len_dw
-       jz      .Ldone
-       mov     len_dw, %eax
-       shr     $3, %eax
-       jz      .Ldo_dword
-.Ldo_qwords:
-       crc32q  (bufp), crc0_q
-       add     $8, bufp
-       dec     %eax
-       jnz     .Ldo_qwords
-.Ldo_dword:
-       test    $4, len_dw
-       jz      .Ldo_word
-       crc32l  (bufp), crc0
-       add     $4, bufp
-.Ldo_word:
-       test    $2, len_dw
-       jz      .Ldo_byte
-       crc32w  (bufp), crc0
-       add     $2, bufp
-.Ldo_byte:
-       test    $1, len_dw
-       jz      .Ldone
-       crc32b  (bufp), crc0
-.Ldone:
-       mov     crc0, %eax
-        RET
-SYM_FUNC_END(crc32c_x86_3way)
-
-.section       .rodata, "a", @progbits
-       ################################################################
-       ## PCLMULQDQ tables
-       ## Table is 128 entries x 2 words (8 bytes) each
-       ################################################################
-.align 8
-K_table:
-       .long 0x493c7d27, 0x00000001
-       .long 0xba4fc28e, 0x493c7d27
-       .long 0xddc0152b, 0xf20c0dfe
-       .long 0x9e4addf8, 0xba4fc28e
-       .long 0x39d3b296, 0x3da6d0cb
-       .long 0x0715ce53, 0xddc0152b
-       .long 0x47db8317, 0x1c291d04
-       .long 0x0d3b6092, 0x9e4addf8
-       .long 0xc96cfdc0, 0x740eef02
-       .long 0x878a92a7, 0x39d3b296
-       .long 0xdaece73e, 0x083a6eec
-       .long 0xab7aff2a, 0x0715ce53
-       .long 0x2162d385, 0xc49f4f67
-       .long 0x83348832, 0x47db8317
-       .long 0x299847d5, 0x2ad91c30
-       .long 0xb9e02b86, 0x0d3b6092
-       .long 0x18b33a4e, 0x6992cea2
-       .long 0xb6dd949b, 0xc96cfdc0
-       .long 0x78d9ccb7, 0x7e908048
-       .long 0xbac2fd7b, 0x878a92a7
-       .long 0xa60ce07b, 0x1b3d8f29
-       .long 0xce7f39f4, 0xdaece73e
-       .long 0x61d82e56, 0xf1d0f55e
-       .long 0xd270f1a2, 0xab7aff2a
-       .long 0xc619809d, 0xa87ab8a8
-       .long 0x2b3cac5d, 0x2162d385
-       .long 0x65863b64, 0x8462d800
-       .long 0x1b03397f, 0x83348832
-       .long 0xebb883bd, 0x71d111a8
-       .long 0xb3e32c28, 0x299847d5
-       .long 0x064f7f26, 0xffd852c6
-       .long 0xdd7e3b0c, 0xb9e02b86
-       .long 0xf285651c, 0xdcb17aa4
-       .long 0x10746f3c, 0x18b33a4e
-       .long 0xc7a68855, 0xf37c5aee
-       .long 0x271d9844, 0xb6dd949b
-       .long 0x8e766a0c, 0x6051d5a2
-       .long 0x93a5f730, 0x78d9ccb7
-       .long 0x6cb08e5c, 0x18b0d4ff
-       .long 0x6b749fb2, 0xbac2fd7b
-       .long 0x1393e203, 0x21f3d99c
-       .long 0xcec3662e, 0xa60ce07b
-       .long 0x96c515bb, 0x8f158014
-       .long 0xe6fc4e6a, 0xce7f39f4
-       .long 0x8227bb8a, 0xa00457f7
-       .long 0xb0cd4768, 0x61d82e56
-       .long 0x39c7ff35, 0x8d6d2c43
-       .long 0xd7a4825c, 0xd270f1a2
-       .long 0x0ab3844b, 0x00ac29cf
-       .long 0x0167d312, 0xc619809d
-       .long 0xf6076544, 0xe9adf796
-       .long 0x26f6a60a, 0x2b3cac5d
-       .long 0xa741c1bf, 0x96638b34
-       .long 0x98d8d9cb, 0x65863b64
-       .long 0x49c3cc9c, 0xe0e9f351
-       .long 0x68bce87a, 0x1b03397f
-       .long 0x57a3d037, 0x9af01f2d
-       .long 0x6956fc3b, 0xebb883bd
-       .long 0x42d98888, 0x2cff42cf
-       .long 0x3771e98f, 0xb3e32c28
-       .long 0xb42ae3d9, 0x88f25a3a
-       .long 0x2178513a, 0x064f7f26
-       .long 0xe0ac139e, 0x4e36f0b0
-       .long 0x170076fa, 0xdd7e3b0c
-       .long 0x444dd413, 0xbd6f81f8
-       .long 0x6f345e45, 0xf285651c
-       .long 0x41d17b64, 0x91c9bd4b
-       .long 0xff0dba97, 0x10746f3c
-       .long 0xa2b73df1, 0x885f087b
-       .long 0xf872e54c, 0xc7a68855
-       .long 0x1e41e9fc, 0x4c144932
-       .long 0x86d8e4d2, 0x271d9844
-       .long 0x651bd98b, 0x52148f02
-       .long 0x5bb8f1bc, 0x8e766a0c
-       .long 0xa90fd27a, 0xa3c6f37a
-       .long 0xb3af077a, 0x93a5f730
-       .long 0x4984d782, 0xd7c0557f
-       .long 0xca6ef3ac, 0x6cb08e5c
-       .long 0x234e0b26, 0x63ded06a
-       .long 0xdd66cbbb, 0x6b749fb2
-       .long 0x4597456a, 0x4d56973c
-       .long 0xe9e28eb4, 0x1393e203
-       .long 0x7b3ff57a, 0x9669c9df
-       .long 0xc9c8b782, 0xcec3662e
-       .long 0x3f70cc6f, 0xe417f38a
-       .long 0x93e106a4, 0x96c515bb
-       .long 0x62ec6c6d, 0x4b9e0f71
-       .long 0xd813b325, 0xe6fc4e6a
-       .long 0x0df04680, 0xd104b8fc
-       .long 0x2342001e, 0x8227bb8a
-       .long 0x0a2a8d7e, 0x5b397730
-       .long 0x6d9a4957, 0xb0cd4768
-       .long 0xe8b6368b, 0xe78eb416
-       .long 0xd2c3ed1a, 0x39c7ff35
-       .long 0x995a5724, 0x61ff0e01
-       .long 0x9ef68d35, 0xd7a4825c
-       .long 0x0c139b31, 0x8d96551c
-       .long 0xf2271e60, 0x0ab3844b
-       .long 0x0b0bf8ca, 0x0bf80dd2
-       .long 0x2664fd8b, 0x0167d312
-       .long 0xed64812d, 0x8821abed
-       .long 0x02ee03b2, 0xf6076544
-       .long 0x8604ae0f, 0x6a45d2b2
-       .long 0x363bd6b3, 0x26f6a60a
-       .long 0x135c83fd, 0xd8d26619
-       .long 0x5fabe670, 0xa741c1bf
-       .long 0x35ec3279, 0xde87806c
-       .long 0x00bcf5f6, 0x98d8d9cb
-       .long 0x8ae00689, 0x14338754
-       .long 0x17f27698, 0x49c3cc9c
-       .long 0x58ca5f00, 0x5bd2011f
-       .long 0xaa7c7ad5, 0x68bce87a
-       .long 0xb5cfca28, 0xdd07448e
-       .long 0xded288f8, 0x57a3d037
-       .long 0x59f229bc, 0xdde8f5b9
-       .long 0x6d390dec, 0x6956fc3b
-       .long 0x37170390, 0xa3e3e02c
-       .long 0x6353c1cc, 0x42d98888
-       .long 0xc4584f5c, 0xd73c7bea
-       .long 0xf48642e9, 0x3771e98f
-       .long 0x531377e2, 0x80ff0093
-       .long 0xdd35bc8d, 0xb42ae3d9
-       .long 0xb25b29f2, 0x8fe4c34d
-       .long 0x9a5ede41, 0x2178513a
-       .long 0xa563905d, 0xdf99fc11
-       .long 0x45cddf4e, 0xe0ac139e
-       .long 0xacfa3103, 0x6c23e841
-       .long 0xa51b6135, 0x170076fa
index 98583a9dbab337e09a2e58905e5200499a496a07..17510da06c9f94aaacffcc7a046d95171ed24759 100644 (file)
@@ -38,6 +38,10 @@ lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
 lib-$(CONFIG_MITIGATION_RETPOLINE) += retpoline.o
 
+obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
+crc32-x86-y := crc32-glue.o crc32-pclmul.o
+crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
+
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 obj-y += iomem.o
 
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
new file mode 100644 (file)
index 0000000..2dd18a8
--- /dev/null
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * x86-optimized CRC32 functions
+ *
+ * Copyright (C) 2008 Intel Corporation
+ * Copyright 2012 Xyratex Technology Limited
+ * Copyright 2024 Google LLC
+ */
+
+#include <asm/cpufeatures.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <linux/crc32.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+/* minimum size of buffer for crc32_pclmul_le_16 */
+#define CRC32_PCLMUL_MIN_LEN   64
+
+static DEFINE_STATIC_KEY_FALSE(have_crc32);
+static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
+           static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
+               size_t n = -(uintptr_t)p & 15;
+
+               /* align p to 16-byte boundary */
+               if (n) {
+                       crc = crc32_le_base(crc, p, n);
+                       p += n;
+                       len -= n;
+               }
+               n = round_down(len, 16);
+               kernel_fpu_begin();
+               crc = crc32_pclmul_le_16(crc, p, n);
+               kernel_fpu_end();
+               p += n;
+               len -= n;
+       }
+       if (len)
+               crc = crc32_le_base(crc, p, len);
+       return crc;
+}
+EXPORT_SYMBOL(crc32_le_arch);
+
+#ifdef CONFIG_X86_64
+#define CRC32_INST "crc32q %1, %q0"
+#else
+#define CRC32_INST "crc32l %1, %0"
+#endif
+
+/*
+ * Use carryless multiply version of crc32c when buffer size is >= 512 to
+ * account for FPU state save/restore overhead.
+ */
+#define CRC32C_PCLMUL_BREAKEVEN        512
+
+asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
+
+u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+{
+       size_t num_longs;
+
+       if (!static_branch_likely(&have_crc32))
+               return crc32c_le_base(crc, p, len);
+
+       if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
+           static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
+               kernel_fpu_begin();
+               crc = crc32c_x86_3way(crc, p, len);
+               kernel_fpu_end();
+               return crc;
+       }
+
+       for (num_longs = len / sizeof(unsigned long);
+            num_longs != 0; num_longs--, p += sizeof(unsigned long))
+               asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+
+       for (len %= sizeof(unsigned long); len; len--, p++)
+               asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+
+       return crc;
+}
+EXPORT_SYMBOL(crc32c_le_arch);
+
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
+{
+       return crc32_be_base(crc, p, len);
+}
+EXPORT_SYMBOL(crc32_be_arch);
+
+static int __init crc32_x86_init(void)
+{
+       if (boot_cpu_has(X86_FEATURE_XMM4_2))
+               static_branch_enable(&have_crc32);
+       if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
+               static_branch_enable(&have_pclmulqdq);
+       return 0;
+}
+arch_initcall(crc32_x86_init);
+
+static void __exit crc32_x86_exit(void)
+{
+}
+module_exit(crc32_x86_exit);
+
+u32 crc32_optimizations(void)
+{
+       u32 optimizations = 0;
+
+       if (static_key_enabled(&have_crc32))
+               optimizations |= CRC32C_OPTIMIZATION;
+       if (static_key_enabled(&have_pclmulqdq))
+               optimizations |= CRC32_LE_OPTIMIZATION;
+       return optimizations;
+}
+EXPORT_SYMBOL(crc32_optimizations);
+
+MODULE_DESCRIPTION("x86-optimized CRC32 functions");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc32-pclmul.S b/arch/x86/lib/crc32-pclmul.S
new file mode 100644 (file)
index 0000000..f963778
--- /dev/null
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2012 Xyratex Technology Limited
+ *
+ * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
+ * calculation.
+ * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
+ * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
+ * at:
+ * http://www.intel.com/products/processor/manuals/
+ * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+ * Volume 2B: Instruction Set Reference, N-Z
+ *
+ * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
+ *           Alexander Boyko <Alexander_Boyko@xyratex.com>
+ */
+
+#include <linux/linkage.h>
+
+
+.section .rodata
+.align 16
+/*
+ * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
+ * #define CONSTANT_R1  0x154442bd4LL
+ *
+ * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
+ * #define CONSTANT_R2  0x1c6e41596LL
+ */
+.Lconstant_R2R1:
+       .octa 0x00000001c6e415960000000154442bd4
+/*
+ * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
+ * #define CONSTANT_R3  0x1751997d0LL
+ *
+ * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
+ * #define CONSTANT_R4  0x0ccaa009eLL
+ */
+.Lconstant_R4R3:
+       .octa 0x00000000ccaa009e00000001751997d0
+/*
+ * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
+ * #define CONSTANT_R5  0x163cd6124LL
+ */
+.Lconstant_R5:
+       .octa 0x00000000000000000000000163cd6124
+.Lconstant_mask32:
+       .octa 0x000000000000000000000000FFFFFFFF
+/*
+ * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
+ *
+ * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
+ * #define CONSTANT_RU  0x1F7011641LL
+ */
+.Lconstant_RUpoly:
+       .octa 0x00000001F701164100000001DB710641
+
+#define CONSTANT %xmm0
+
+#ifdef __x86_64__
+#define CRC     %edi
+#define BUF     %rsi
+#define LEN     %rdx
+#else
+#define CRC     %eax
+#define BUF     %edx
+#define LEN     %ecx
+#endif
+
+
+
+.text
+/**
+ *      Calculate crc32
+ *      CRC - initial crc32
+ *      BUF - buffer (16 bytes aligned)
+ *      LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
+ *      return %eax crc32
+ *      u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+ */
+
+SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
+       movdqa  (BUF), %xmm1
+       movdqa  0x10(BUF), %xmm2
+       movdqa  0x20(BUF), %xmm3
+       movdqa  0x30(BUF), %xmm4
+       movd    CRC, CONSTANT
+       pxor    CONSTANT, %xmm1
+       sub     $0x40, LEN
+       add     $0x40, BUF
+       cmp     $0x40, LEN
+       jb      .Lless_64
+
+#ifdef __x86_64__
+       movdqa .Lconstant_R2R1(%rip), CONSTANT
+#else
+       movdqa .Lconstant_R2R1, CONSTANT
+#endif
+
+.Lloop_64:/*  64 bytes Full cache line folding */
+       prefetchnta    0x40(BUF)
+       movdqa  %xmm1, %xmm5
+       movdqa  %xmm2, %xmm6
+       movdqa  %xmm3, %xmm7
+#ifdef __x86_64__
+       movdqa  %xmm4, %xmm8
+#endif
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm2
+       pclmulqdq $0x00, CONSTANT, %xmm3
+#ifdef __x86_64__
+       pclmulqdq $0x00, CONSTANT, %xmm4
+#endif
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pclmulqdq $0x11, CONSTANT, %xmm6
+       pclmulqdq $0x11, CONSTANT, %xmm7
+#ifdef __x86_64__
+       pclmulqdq $0x11, CONSTANT, %xmm8
+#endif
+       pxor    %xmm5, %xmm1
+       pxor    %xmm6, %xmm2
+       pxor    %xmm7, %xmm3
+#ifdef __x86_64__
+       pxor    %xmm8, %xmm4
+#else
+       /* xmm8 unsupported for x32 */
+       movdqa  %xmm4, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm4
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm4
+#endif
+
+       pxor    (BUF), %xmm1
+       pxor    0x10(BUF), %xmm2
+       pxor    0x20(BUF), %xmm3
+       pxor    0x30(BUF), %xmm4
+
+       sub     $0x40, LEN
+       add     $0x40, BUF
+       cmp     $0x40, LEN
+       jge     .Lloop_64
+.Lless_64:/*  Folding cache line into 128bit */
+#ifdef __x86_64__
+       movdqa  .Lconstant_R4R3(%rip), CONSTANT
+#else
+       movdqa  .Lconstant_R4R3, CONSTANT
+#endif
+       prefetchnta     (BUF)
+
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm2, %xmm1
+
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm3, %xmm1
+
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    %xmm4, %xmm1
+
+       cmp     $0x10, LEN
+       jb      .Lfold_64
+.Lloop_16:/* Folding rest buffer into 128bit */
+       movdqa  %xmm1, %xmm5
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pclmulqdq $0x11, CONSTANT, %xmm5
+       pxor    %xmm5, %xmm1
+       pxor    (BUF), %xmm1
+       sub     $0x10, LEN
+       add     $0x10, BUF
+       cmp     $0x10, LEN
+       jge     .Lloop_16
+
+.Lfold_64:
+       /* perform the last 64 bit fold, also adds 32 zeroes
+        * to the input stream */
+       pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
+       psrldq  $0x08, %xmm1
+       pxor    CONSTANT, %xmm1
+
+       /* final 32-bit fold */
+       movdqa  %xmm1, %xmm2
+#ifdef __x86_64__
+       movdqa  .Lconstant_R5(%rip), CONSTANT
+       movdqa  .Lconstant_mask32(%rip), %xmm3
+#else
+       movdqa  .Lconstant_R5, CONSTANT
+       movdqa  .Lconstant_mask32, %xmm3
+#endif
+       psrldq  $0x04, %xmm2
+       pand    %xmm3, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pxor    %xmm2, %xmm1
+
+       /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
+#ifdef __x86_64__
+       movdqa  .Lconstant_RUpoly(%rip), CONSTANT
+#else
+       movdqa  .Lconstant_RUpoly, CONSTANT
+#endif
+       movdqa  %xmm1, %xmm2
+       pand    %xmm3, %xmm1
+       pclmulqdq $0x10, CONSTANT, %xmm1
+       pand    %xmm3, %xmm1
+       pclmulqdq $0x00, CONSTANT, %xmm1
+       pxor    %xmm2, %xmm1
+       pextrd  $0x01, %xmm1, %eax
+
+       RET
+SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/lib/crc32c-3way.S b/arch/x86/lib/crc32c-3way.S
new file mode 100644 (file)
index 0000000..9b87705
--- /dev/null
@@ -0,0 +1,360 @@
+/*
+ * Implement fast CRC32C with PCLMULQDQ instructions. (x86_64)
+ *
+ * The white papers on CRC32C calculations with PCLMULQDQ instruction can be
+ * downloaded from:
+ * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/crc-iscsi-polynomial-crc32-instruction-paper.pdf
+ * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-paper.pdf
+ *
+ * Copyright (C) 2012 Intel Corporation.
+ * Copyright 2024 Google LLC
+ *
+ * Authors:
+ *     Wajdi Feghali <wajdi.k.feghali@intel.com>
+ *     James Guilford <james.guilford@intel.com>
+ *     David Cote <david.m.cote@intel.com>
+ *     Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/linkage.h>
+
+## ISCSI CRC 32 Implementation with crc32 and pclmulqdq Instruction
+
+# Define threshold below which buffers are considered "small" and routed to
+# regular CRC code that does not interleave the CRC instructions.
+#define SMALL_SIZE 200
+
+# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
+
+.text
+SYM_FUNC_START(crc32c_x86_3way)
+#define    crc0                  %edi
+#define    crc0_q        %rdi
+#define    bufp                  %rsi
+#define    bufp_d        %esi
+#define    len           %rdx
+#define    len_dw        %edx
+#define    n_misaligned          %ecx /* overlaps chunk_bytes! */
+#define    n_misaligned_q %rcx
+#define    chunk_bytes   %ecx /* overlaps n_misaligned! */
+#define    chunk_bytes_q  %rcx
+#define    crc1                  %r8
+#define    crc2                  %r9
+
+       cmp     $SMALL_SIZE, len
+       jb      .Lsmall
+
+       ################################################################
+       ## 1) ALIGN:
+       ################################################################
+       mov     bufp_d, n_misaligned
+       neg     n_misaligned
+       and     $7, n_misaligned        # calculate the misalignment amount of
+                                       # the address
+       je      .Laligned               # Skip if aligned
+
+       # Process 1 <= n_misaligned <= 7 bytes individually in order to align
+       # the remaining data to an 8-byte boundary.
+.Ldo_align:
+       movq    (bufp), %rax
+       add     n_misaligned_q, bufp
+       sub     n_misaligned_q, len
+.Lalign_loop:
+       crc32b  %al, crc0               # compute crc32 of 1-byte
+       shr     $8, %rax                # get next byte
+       dec     n_misaligned
+       jne     .Lalign_loop
+.Laligned:
+
+       ################################################################
+       ## 2) PROCESS BLOCK:
+       ################################################################
+
+       cmp     $128*24, len
+       jae     .Lfull_block
+
+.Lpartial_block:
+       # Compute floor(len / 24) to get num qwords to process from each lane.
+       imul    $2731, len_dw, %eax     # 2731 = ceil(2^16 / 24)
+       shr     $16, %eax
+       jmp     .Lcrc_3lanes
+
+.Lfull_block:
+       # Processing 128 qwords from each lane.
+       mov     $128, %eax
+
+       ################################################################
+       ## 3) CRC each of three lanes:
+       ################################################################
+
+.Lcrc_3lanes:
+       xor     crc1,crc1
+       xor     crc2,crc2
+       mov     %eax, chunk_bytes
+       shl     $3, chunk_bytes         # num bytes to process from each lane
+       sub     $5, %eax                # 4 for 4x_loop, 1 for special last iter
+       jl      .Lcrc_3lanes_4x_done
+
+       # Unroll the loop by a factor of 4 to reduce the overhead of the loop
+       # bookkeeping instructions, which can compete with crc32q for the ALUs.
+.Lcrc_3lanes_4x_loop:
+       crc32q  (bufp), crc0_q
+       crc32q  (bufp,chunk_bytes_q), crc1
+       crc32q  (bufp,chunk_bytes_q,2), crc2
+       crc32q  8(bufp), crc0_q
+       crc32q  8(bufp,chunk_bytes_q), crc1
+       crc32q  8(bufp,chunk_bytes_q,2), crc2
+       crc32q  16(bufp), crc0_q
+       crc32q  16(bufp,chunk_bytes_q), crc1
+       crc32q  16(bufp,chunk_bytes_q,2), crc2
+       crc32q  24(bufp), crc0_q
+       crc32q  24(bufp,chunk_bytes_q), crc1
+       crc32q  24(bufp,chunk_bytes_q,2), crc2
+       add     $32, bufp
+       sub     $4, %eax
+       jge     .Lcrc_3lanes_4x_loop
+
+.Lcrc_3lanes_4x_done:
+       add     $4, %eax
+       jz      .Lcrc_3lanes_last_qword
+
+.Lcrc_3lanes_1x_loop:
+       crc32q  (bufp), crc0_q
+       crc32q  (bufp,chunk_bytes_q), crc1
+       crc32q  (bufp,chunk_bytes_q,2), crc2
+       add     $8, bufp
+       dec     %eax
+       jnz     .Lcrc_3lanes_1x_loop
+
+.Lcrc_3lanes_last_qword:
+       crc32q  (bufp), crc0_q
+       crc32q  (bufp,chunk_bytes_q), crc1
+# SKIP  crc32q (bufp,chunk_bytes_q,2), crc2    ; Don't do this one yet
+
+       ################################################################
+       ## 4) Combine three results:
+       ################################################################
+
+       lea     (K_table-8)(%rip), %rax         # first entry is for idx 1
+       pmovzxdq (%rax,chunk_bytes_q), %xmm0    # 2 consts: K1:K2
+       lea     (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3
+       sub     %rax, len                       # len -= chunk_bytes * 3
+
+       movq    crc0_q, %xmm1                   # CRC for block 1
+       pclmulqdq $0x00, %xmm0, %xmm1           # Multiply by K2
+
+       movq    crc1, %xmm2                     # CRC for block 2
+       pclmulqdq $0x10, %xmm0, %xmm2           # Multiply by K1
+
+       pxor    %xmm2,%xmm1
+       movq    %xmm1, %rax
+       xor     (bufp,chunk_bytes_q,2), %rax
+       mov     crc2, crc0_q
+       crc32   %rax, crc0_q
+       lea     8(bufp,chunk_bytes_q,2), bufp
+
+       ################################################################
+       ## 5) If more blocks remain, goto (2):
+       ################################################################
+
+       cmp     $128*24, len
+       jae     .Lfull_block
+       cmp     $SMALL_SIZE, len
+       jae     .Lpartial_block
+
+       #######################################################################
+       ## 6) Process any remainder without interleaving:
+       #######################################################################
+.Lsmall:
+       test    len_dw, len_dw
+       jz      .Ldone
+       mov     len_dw, %eax
+       shr     $3, %eax
+       jz      .Ldo_dword
+.Ldo_qwords:
+       crc32q  (bufp), crc0_q
+       add     $8, bufp
+       dec     %eax
+       jnz     .Ldo_qwords
+.Ldo_dword:
+       test    $4, len_dw
+       jz      .Ldo_word
+       crc32l  (bufp), crc0
+       add     $4, bufp
+.Ldo_word:
+       test    $2, len_dw
+       jz      .Ldo_byte
+       crc32w  (bufp), crc0
+       add     $2, bufp
+.Ldo_byte:
+       test    $1, len_dw
+       jz      .Ldone
+       crc32b  (bufp), crc0
+.Ldone:
+       mov     crc0, %eax
+        RET
+SYM_FUNC_END(crc32c_x86_3way)
+
+.section       .rodata, "a", @progbits
+       ################################################################
+       ## PCLMULQDQ tables
+       ## Table is 128 entries x 2 words (8 bytes) each
+       ################################################################
+.align 8
+K_table:
+       .long 0x493c7d27, 0x00000001
+       .long 0xba4fc28e, 0x493c7d27
+       .long 0xddc0152b, 0xf20c0dfe
+       .long 0x9e4addf8, 0xba4fc28e
+       .long 0x39d3b296, 0x3da6d0cb
+       .long 0x0715ce53, 0xddc0152b
+       .long 0x47db8317, 0x1c291d04
+       .long 0x0d3b6092, 0x9e4addf8
+       .long 0xc96cfdc0, 0x740eef02
+       .long 0x878a92a7, 0x39d3b296
+       .long 0xdaece73e, 0x083a6eec
+       .long 0xab7aff2a, 0x0715ce53
+       .long 0x2162d385, 0xc49f4f67
+       .long 0x83348832, 0x47db8317
+       .long 0x299847d5, 0x2ad91c30
+       .long 0xb9e02b86, 0x0d3b6092
+       .long 0x18b33a4e, 0x6992cea2
+       .long 0xb6dd949b, 0xc96cfdc0
+       .long 0x78d9ccb7, 0x7e908048
+       .long 0xbac2fd7b, 0x878a92a7
+       .long 0xa60ce07b, 0x1b3d8f29
+       .long 0xce7f39f4, 0xdaece73e
+       .long 0x61d82e56, 0xf1d0f55e
+       .long 0xd270f1a2, 0xab7aff2a
+       .long 0xc619809d, 0xa87ab8a8
+       .long 0x2b3cac5d, 0x2162d385
+       .long 0x65863b64, 0x8462d800
+       .long 0x1b03397f, 0x83348832
+       .long 0xebb883bd, 0x71d111a8
+       .long 0xb3e32c28, 0x299847d5
+       .long 0x064f7f26, 0xffd852c6
+       .long 0xdd7e3b0c, 0xb9e02b86
+       .long 0xf285651c, 0xdcb17aa4
+       .long 0x10746f3c, 0x18b33a4e
+       .long 0xc7a68855, 0xf37c5aee
+       .long 0x271d9844, 0xb6dd949b
+       .long 0x8e766a0c, 0x6051d5a2
+       .long 0x93a5f730, 0x78d9ccb7
+       .long 0x6cb08e5c, 0x18b0d4ff
+       .long 0x6b749fb2, 0xbac2fd7b
+       .long 0x1393e203, 0x21f3d99c
+       .long 0xcec3662e, 0xa60ce07b
+       .long 0x96c515bb, 0x8f158014
+       .long 0xe6fc4e6a, 0xce7f39f4
+       .long 0x8227bb8a, 0xa00457f7
+       .long 0xb0cd4768, 0x61d82e56
+       .long 0x39c7ff35, 0x8d6d2c43
+       .long 0xd7a4825c, 0xd270f1a2
+       .long 0x0ab3844b, 0x00ac29cf
+       .long 0x0167d312, 0xc619809d
+       .long 0xf6076544, 0xe9adf796
+       .long 0x26f6a60a, 0x2b3cac5d
+       .long 0xa741c1bf, 0x96638b34
+       .long 0x98d8d9cb, 0x65863b64
+       .long 0x49c3cc9c, 0xe0e9f351
+       .long 0x68bce87a, 0x1b03397f
+       .long 0x57a3d037, 0x9af01f2d
+       .long 0x6956fc3b, 0xebb883bd
+       .long 0x42d98888, 0x2cff42cf
+       .long 0x3771e98f, 0xb3e32c28
+       .long 0xb42ae3d9, 0x88f25a3a
+       .long 0x2178513a, 0x064f7f26
+       .long 0xe0ac139e, 0x4e36f0b0
+       .long 0x170076fa, 0xdd7e3b0c
+       .long 0x444dd413, 0xbd6f81f8
+       .long 0x6f345e45, 0xf285651c
+       .long 0x41d17b64, 0x91c9bd4b
+       .long 0xff0dba97, 0x10746f3c
+       .long 0xa2b73df1, 0x885f087b
+       .long 0xf872e54c, 0xc7a68855
+       .long 0x1e41e9fc, 0x4c144932
+       .long 0x86d8e4d2, 0x271d9844
+       .long 0x651bd98b, 0x52148f02
+       .long 0x5bb8f1bc, 0x8e766a0c
+       .long 0xa90fd27a, 0xa3c6f37a
+       .long 0xb3af077a, 0x93a5f730
+       .long 0x4984d782, 0xd7c0557f
+       .long 0xca6ef3ac, 0x6cb08e5c
+       .long 0x234e0b26, 0x63ded06a
+       .long 0xdd66cbbb, 0x6b749fb2
+       .long 0x4597456a, 0x4d56973c
+       .long 0xe9e28eb4, 0x1393e203
+       .long 0x7b3ff57a, 0x9669c9df
+       .long 0xc9c8b782, 0xcec3662e
+       .long 0x3f70cc6f, 0xe417f38a
+       .long 0x93e106a4, 0x96c515bb
+       .long 0x62ec6c6d, 0x4b9e0f71
+       .long 0xd813b325, 0xe6fc4e6a
+       .long 0x0df04680, 0xd104b8fc
+       .long 0x2342001e, 0x8227bb8a
+       .long 0x0a2a8d7e, 0x5b397730
+       .long 0x6d9a4957, 0xb0cd4768
+       .long 0xe8b6368b, 0xe78eb416
+       .long 0xd2c3ed1a, 0x39c7ff35
+       .long 0x995a5724, 0x61ff0e01
+       .long 0x9ef68d35, 0xd7a4825c
+       .long 0x0c139b31, 0x8d96551c
+       .long 0xf2271e60, 0x0ab3844b
+       .long 0x0b0bf8ca, 0x0bf80dd2
+       .long 0x2664fd8b, 0x0167d312
+       .long 0xed64812d, 0x8821abed
+       .long 0x02ee03b2, 0xf6076544
+       .long 0x8604ae0f, 0x6a45d2b2
+       .long 0x363bd6b3, 0x26f6a60a
+       .long 0x135c83fd, 0xd8d26619
+       .long 0x5fabe670, 0xa741c1bf
+       .long 0x35ec3279, 0xde87806c
+       .long 0x00bcf5f6, 0x98d8d9cb
+       .long 0x8ae00689, 0x14338754
+       .long 0x17f27698, 0x49c3cc9c
+       .long 0x58ca5f00, 0x5bd2011f
+       .long 0xaa7c7ad5, 0x68bce87a
+       .long 0xb5cfca28, 0xdd07448e
+       .long 0xded288f8, 0x57a3d037
+       .long 0x59f229bc, 0xdde8f5b9
+       .long 0x6d390dec, 0x6956fc3b
+       .long 0x37170390, 0xa3e3e02c
+       .long 0x6353c1cc, 0x42d98888
+       .long 0xc4584f5c, 0xd73c7bea
+       .long 0xf48642e9, 0x3771e98f
+       .long 0x531377e2, 0x80ff0093
+       .long 0xdd35bc8d, 0xb42ae3d9
+       .long 0xb25b29f2, 0x8fe4c34d
+       .long 0x9a5ede41, 0x2178513a
+       .long 0xa563905d, 0xdf99fc11
+       .long 0x45cddf4e, 0xe0ac139e
+       .long 0xacfa3103, 0x6c23e841
+       .long 0xa51b6135, 0x170076fa
index 922b207bc69dca6fe7f6833046b683d151cfbdb5..1c0517a125713a59c9ba1d0c3f1ac424e9db8c3d 100644 (file)
@@ -4,7 +4,6 @@ config ISCSI_TARGET
        depends on INET
        select CRYPTO
        select CRYPTO_CRC32C
-       select CRYPTO_CRC32C_INTEL if X86
        help
        Say M to enable the SCSI target mode stack. A SCSI target mode stack
        is software that makes local storage available over a storage network