crypto: riscv - move library functions to arch/riscv/lib/crypto/
authorEric Biggers <ebiggers@google.com>
Tue, 22 Apr 2025 15:27:12 +0000 (08:27 -0700)
committerHerbert Xu <herbert@gondor.apana.org.au>
Mon, 28 Apr 2025 11:40:53 +0000 (19:40 +0800)
Continue disentangling the crypto library functions from the generic
crypto infrastructure by moving the riscv ChaCha library functions into
a new directory arch/riscv/lib/crypto/ that does not depend on CRYPTO.
This mirrors the distinction between crypto/ and lib/crypto/.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/riscv/crypto/Kconfig
arch/riscv/crypto/Makefile
arch/riscv/crypto/chacha-riscv64-glue.c [deleted file]
arch/riscv/crypto/chacha-riscv64-zvkb.S [deleted file]
arch/riscv/lib/Makefile
arch/riscv/lib/crypto/Kconfig [new file with mode: 0644]
arch/riscv/lib/crypto/Makefile [new file with mode: 0644]
arch/riscv/lib/crypto/chacha-riscv64-glue.c [new file with mode: 0644]
arch/riscv/lib/crypto/chacha-riscv64-zvkb.S [new file with mode: 0644]
lib/crypto/Kconfig

index 27a1f26d41bdeaabcebac31e16a3486a9080ec2d..4863be2a4ec2fd393666386c74e6f9fde5226d01 100644 (file)
@@ -18,13 +18,6 @@ config CRYPTO_AES_RISCV64
          - Zvkb vector crypto extension (CTR)
          - Zvkg vector crypto extension (XTS)
 
-config CRYPTO_CHACHA_RISCV64
-       tristate
-       depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
-       select CRYPTO_ARCH_HAVE_LIB_CHACHA
-       select CRYPTO_LIB_CHACHA_GENERIC
-       default CRYPTO_LIB_CHACHA_INTERNAL
-
 config CRYPTO_GHASH_RISCV64
        tristate "Hash functions: GHASH"
        depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
index 247c7bc7288ceccfd17e34e557b842599ff89a41..4ae9bf762e90715da543aa15850197a88f9b1951 100644 (file)
@@ -4,9 +4,6 @@ obj-$(CONFIG_CRYPTO_AES_RISCV64) += aes-riscv64.o
 aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
                 aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o
 
-obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
-chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
-
 obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
 ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
 
diff --git a/arch/riscv/crypto/chacha-riscv64-glue.c b/arch/riscv/crypto/chacha-riscv64-glue.c
deleted file mode 100644 (file)
index afc4e3b..0000000
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ChaCha stream cipher (RISC-V optimized)
- *
- * Copyright (C) 2023 SiFive, Inc.
- * Author: Jerry Shih <jerry.shih@sifive.com>
- */
-
-#include <asm/simd.h>
-#include <asm/vector.h>
-#include <crypto/chacha.h>
-#include <crypto/internal/simd.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
-
-asmlinkage void chacha_zvkb(u32 state[16], const u8 *in, u8 *out,
-                           size_t nblocks, int nrounds);
-
-void hchacha_block_arch(const u32 *state, u32 *out, int nrounds)
-{
-       hchacha_block_generic(state, out, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
-                      int nrounds)
-{
-       u8 block_buffer[CHACHA_BLOCK_SIZE];
-       unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
-       unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
-
-       if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
-               return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
-       kernel_vector_begin();
-       if (full_blocks) {
-               chacha_zvkb(state, src, dst, full_blocks, nrounds);
-               src += full_blocks * CHACHA_BLOCK_SIZE;
-               dst += full_blocks * CHACHA_BLOCK_SIZE;
-       }
-       if (tail_bytes) {
-               memcpy(block_buffer, src, tail_bytes);
-               chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
-               memcpy(dst, block_buffer, tail_bytes);
-       }
-       kernel_vector_end();
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-bool chacha_is_arch_optimized(void)
-{
-       return static_key_enabled(&use_zvkb);
-}
-EXPORT_SYMBOL(chacha_is_arch_optimized);
-
-static int __init riscv64_chacha_mod_init(void)
-{
-       if (riscv_isa_extension_available(NULL, ZVKB) &&
-           riscv_vector_vlen() >= 128)
-               static_branch_enable(&use_zvkb);
-       return 0;
-}
-arch_initcall(riscv64_chacha_mod_init);
-
-static void __exit riscv64_chacha_mod_exit(void)
-{
-}
-module_exit(riscv64_chacha_mod_exit);
-
-MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
-MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
-MODULE_LICENSE("GPL");
diff --git a/arch/riscv/crypto/chacha-riscv64-zvkb.S b/arch/riscv/crypto/chacha-riscv64-zvkb.S
deleted file mode 100644 (file)
index ab4423b..0000000
+++ /dev/null
@@ -1,297 +0,0 @@
-/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
-//
-// This file is dual-licensed, meaning that you can use it under your
-// choice of either of the following two licenses:
-//
-// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the Apache License 2.0 (the "License"). You can obtain
-// a copy in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-//
-// or
-//
-// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
-// Copyright 2024 Google LLC
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// 1. Redistributions of source code must retain the above copyright
-//    notice, this list of conditions and the following disclaimer.
-// 2. Redistributions in binary form must reproduce the above copyright
-//    notice, this list of conditions and the following disclaimer in the
-//    documentation and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The generated code of this file depends on the following RISC-V extensions:
-// - RV64I
-// - RISC-V Vector ('V') with VLEN >= 128
-// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-
-#include <linux/linkage.h>
-
-.text
-.option arch, +zvkb
-
-#define STATEP         a0
-#define INP            a1
-#define OUTP           a2
-#define NBLOCKS                a3
-#define NROUNDS                a4
-
-#define CONSTS0                a5
-#define CONSTS1                a6
-#define CONSTS2                a7
-#define CONSTS3                t0
-#define TMP            t1
-#define VL             t2
-#define STRIDE         t3
-#define ROUND_CTR      t4
-#define KEY0           s0
-#define KEY1           s1
-#define KEY2           s2
-#define KEY3           s3
-#define KEY4           s4
-#define KEY5           s5
-#define KEY6           s6
-#define KEY7           s7
-#define COUNTER                s8
-#define NONCE0         s9
-#define NONCE1         s10
-#define NONCE2         s11
-
-.macro chacha_round    a0, b0, c0, d0,  a1, b1, c1, d1, \
-                       a2, b2, c2, d2,  a3, b3, c3, d3
-       // a += b; d ^= a; d = rol(d, 16);
-       vadd.vv         \a0, \a0, \b0
-       vadd.vv         \a1, \a1, \b1
-       vadd.vv         \a2, \a2, \b2
-       vadd.vv         \a3, \a3, \b3
-       vxor.vv         \d0, \d0, \a0
-       vxor.vv         \d1, \d1, \a1
-       vxor.vv         \d2, \d2, \a2
-       vxor.vv         \d3, \d3, \a3
-       vror.vi         \d0, \d0, 32 - 16
-       vror.vi         \d1, \d1, 32 - 16
-       vror.vi         \d2, \d2, 32 - 16
-       vror.vi         \d3, \d3, 32 - 16
-
-       // c += d; b ^= c; b = rol(b, 12);
-       vadd.vv         \c0, \c0, \d0
-       vadd.vv         \c1, \c1, \d1
-       vadd.vv         \c2, \c2, \d2
-       vadd.vv         \c3, \c3, \d3
-       vxor.vv         \b0, \b0, \c0
-       vxor.vv         \b1, \b1, \c1
-       vxor.vv         \b2, \b2, \c2
-       vxor.vv         \b3, \b3, \c3
-       vror.vi         \b0, \b0, 32 - 12
-       vror.vi         \b1, \b1, 32 - 12
-       vror.vi         \b2, \b2, 32 - 12
-       vror.vi         \b3, \b3, 32 - 12
-
-       // a += b; d ^= a; d = rol(d, 8);
-       vadd.vv         \a0, \a0, \b0
-       vadd.vv         \a1, \a1, \b1
-       vadd.vv         \a2, \a2, \b2
-       vadd.vv         \a3, \a3, \b3
-       vxor.vv         \d0, \d0, \a0
-       vxor.vv         \d1, \d1, \a1
-       vxor.vv         \d2, \d2, \a2
-       vxor.vv         \d3, \d3, \a3
-       vror.vi         \d0, \d0, 32 - 8
-       vror.vi         \d1, \d1, 32 - 8
-       vror.vi         \d2, \d2, 32 - 8
-       vror.vi         \d3, \d3, 32 - 8
-
-       // c += d; b ^= c; b = rol(b, 7);
-       vadd.vv         \c0, \c0, \d0
-       vadd.vv         \c1, \c1, \d1
-       vadd.vv         \c2, \c2, \d2
-       vadd.vv         \c3, \c3, \d3
-       vxor.vv         \b0, \b0, \c0
-       vxor.vv         \b1, \b1, \c1
-       vxor.vv         \b2, \b2, \c2
-       vxor.vv         \b3, \b3, \c3
-       vror.vi         \b0, \b0, 32 - 7
-       vror.vi         \b1, \b1, 32 - 7
-       vror.vi         \b2, \b2, 32 - 7
-       vror.vi         \b3, \b3, 32 - 7
-.endm
-
-// void chacha_zvkb(u32 state[16], const u8 *in, u8 *out, size_t nblocks,
-//                 int nrounds);
-//
-// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
-//
-// |state| gives the ChaCha state matrix, including the 32-bit counter in
-// state[12] following the RFC7539 convention; note that this differs from the
-// original Salsa20 paper which uses a 64-bit counter in state[12..13].  The
-// updated 32-bit counter is written back to state[12] before returning.
-SYM_FUNC_START(chacha_zvkb)
-       addi            sp, sp, -96
-       sd              s0, 0(sp)
-       sd              s1, 8(sp)
-       sd              s2, 16(sp)
-       sd              s3, 24(sp)
-       sd              s4, 32(sp)
-       sd              s5, 40(sp)
-       sd              s6, 48(sp)
-       sd              s7, 56(sp)
-       sd              s8, 64(sp)
-       sd              s9, 72(sp)
-       sd              s10, 80(sp)
-       sd              s11, 88(sp)
-
-       li              STRIDE, 64
-
-       // Set up the initial state matrix in scalar registers.
-       lw              CONSTS0, 0(STATEP)
-       lw              CONSTS1, 4(STATEP)
-       lw              CONSTS2, 8(STATEP)
-       lw              CONSTS3, 12(STATEP)
-       lw              KEY0, 16(STATEP)
-       lw              KEY1, 20(STATEP)
-       lw              KEY2, 24(STATEP)
-       lw              KEY3, 28(STATEP)
-       lw              KEY4, 32(STATEP)
-       lw              KEY5, 36(STATEP)
-       lw              KEY6, 40(STATEP)
-       lw              KEY7, 44(STATEP)
-       lw              COUNTER, 48(STATEP)
-       lw              NONCE0, 52(STATEP)
-       lw              NONCE1, 56(STATEP)
-       lw              NONCE2, 60(STATEP)
-
-.Lblock_loop:
-       // Set vl to the number of blocks to process in this iteration.
-       vsetvli         VL, NBLOCKS, e32, m1, ta, ma
-
-       // Set up the initial state matrix for the next VL blocks in v0-v15.
-       // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
-       // Note that only the counter word, at index 12, differs across blocks.
-       vmv.v.x         v0, CONSTS0
-       vmv.v.x         v1, CONSTS1
-       vmv.v.x         v2, CONSTS2
-       vmv.v.x         v3, CONSTS3
-       vmv.v.x         v4, KEY0
-       vmv.v.x         v5, KEY1
-       vmv.v.x         v6, KEY2
-       vmv.v.x         v7, KEY3
-       vmv.v.x         v8, KEY4
-       vmv.v.x         v9, KEY5
-       vmv.v.x         v10, KEY6
-       vmv.v.x         v11, KEY7
-       vid.v           v12
-       vadd.vx         v12, v12, COUNTER
-       vmv.v.x         v13, NONCE0
-       vmv.v.x         v14, NONCE1
-       vmv.v.x         v15, NONCE2
-
-       // Load the first half of the input data for each block into v16-v23.
-       // v{16+i} holds the i'th 32-bit word for all blocks.
-       vlsseg8e32.v    v16, (INP), STRIDE
-
-       mv              ROUND_CTR, NROUNDS
-.Lnext_doubleround:
-       addi            ROUND_CTR, ROUND_CTR, -2
-       // column round
-       chacha_round    v0, v4, v8, v12, v1, v5, v9, v13, \
-                       v2, v6, v10, v14, v3, v7, v11, v15
-       // diagonal round
-       chacha_round    v0, v5, v10, v15, v1, v6, v11, v12, \
-                       v2, v7, v8, v13, v3, v4, v9, v14
-       bnez            ROUND_CTR, .Lnext_doubleround
-
-       // Load the second half of the input data for each block into v24-v31.
-       // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
-       addi            TMP, INP, 32
-       vlsseg8e32.v    v24, (TMP), STRIDE
-
-       // Finalize the first half of the keystream for each block.
-       vadd.vx         v0, v0, CONSTS0
-       vadd.vx         v1, v1, CONSTS1
-       vadd.vx         v2, v2, CONSTS2
-       vadd.vx         v3, v3, CONSTS3
-       vadd.vx         v4, v4, KEY0
-       vadd.vx         v5, v5, KEY1
-       vadd.vx         v6, v6, KEY2
-       vadd.vx         v7, v7, KEY3
-
-       // Encrypt/decrypt the first half of the data for each block.
-       vxor.vv         v16, v16, v0
-       vxor.vv         v17, v17, v1
-       vxor.vv         v18, v18, v2
-       vxor.vv         v19, v19, v3
-       vxor.vv         v20, v20, v4
-       vxor.vv         v21, v21, v5
-       vxor.vv         v22, v22, v6
-       vxor.vv         v23, v23, v7
-
-       // Store the first half of the output data for each block.
-       vssseg8e32.v    v16, (OUTP), STRIDE
-
-       // Finalize the second half of the keystream for each block.
-       vadd.vx         v8, v8, KEY4
-       vadd.vx         v9, v9, KEY5
-       vadd.vx         v10, v10, KEY6
-       vadd.vx         v11, v11, KEY7
-       vid.v           v0
-       vadd.vx         v12, v12, COUNTER
-       vadd.vx         v13, v13, NONCE0
-       vadd.vx         v14, v14, NONCE1
-       vadd.vx         v15, v15, NONCE2
-       vadd.vv         v12, v12, v0
-
-       // Encrypt/decrypt the second half of the data for each block.
-       vxor.vv         v24, v24, v8
-       vxor.vv         v25, v25, v9
-       vxor.vv         v26, v26, v10
-       vxor.vv         v27, v27, v11
-       vxor.vv         v29, v29, v13
-       vxor.vv         v28, v28, v12
-       vxor.vv         v30, v30, v14
-       vxor.vv         v31, v31, v15
-
-       // Store the second half of the output data for each block.
-       addi            TMP, OUTP, 32
-       vssseg8e32.v    v24, (TMP), STRIDE
-
-       // Update the counter, the remaining number of blocks, and the input and
-       // output pointers according to the number of blocks processed (VL).
-       add             COUNTER, COUNTER, VL
-       sub             NBLOCKS, NBLOCKS, VL
-       slli            TMP, VL, 6
-       add             OUTP, OUTP, TMP
-       add             INP, INP, TMP
-       bnez            NBLOCKS, .Lblock_loop
-
-       sw              COUNTER, 48(STATEP)
-       ld              s0, 0(sp)
-       ld              s1, 8(sp)
-       ld              s2, 16(sp)
-       ld              s3, 24(sp)
-       ld              s4, 32(sp)
-       ld              s5, 40(sp)
-       ld              s6, 48(sp)
-       ld              s7, 56(sp)
-       ld              s8, 64(sp)
-       ld              s9, 72(sp)
-       ld              s10, 80(sp)
-       ld              s11, 88(sp)
-       addi            sp, sp, 96
-       ret
-SYM_FUNC_END(chacha_zvkb)
index b1c46153606a67d7790848ddc8380b7c7752b9e5..0baec92d2f55b8320281bded76442f9a31581622 100644 (file)
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+obj-y                  += crypto/
 lib-y                  += delay.o
 lib-y                  += memcpy.o
 lib-y                  += memset.o
diff --git a/arch/riscv/lib/crypto/Kconfig b/arch/riscv/lib/crypto/Kconfig
new file mode 100644 (file)
index 0000000..46ce2a7
--- /dev/null
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_RISCV64
+       tristate
+       depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+       default CRYPTO_LIB_CHACHA_INTERNAL
+       select CRYPTO_ARCH_HAVE_LIB_CHACHA
+       select CRYPTO_LIB_CHACHA_GENERIC
diff --git a/arch/riscv/lib/crypto/Makefile b/arch/riscv/lib/crypto/Makefile
new file mode 100644 (file)
index 0000000..e27b78f
--- /dev/null
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
+chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-glue.c b/arch/riscv/lib/crypto/chacha-riscv64-glue.c
new file mode 100644 (file)
index 0000000..afc4e3b
--- /dev/null
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ChaCha stream cipher (RISC-V optimized)
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
+
+asmlinkage void chacha_zvkb(u32 state[16], const u8 *in, u8 *out,
+                           size_t nblocks, int nrounds);
+
+void hchacha_block_arch(const u32 *state, u32 *out, int nrounds)
+{
+       hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+                      int nrounds)
+{
+       u8 block_buffer[CHACHA_BLOCK_SIZE];
+       unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
+       unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
+
+       if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
+               return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+       kernel_vector_begin();
+       if (full_blocks) {
+               chacha_zvkb(state, src, dst, full_blocks, nrounds);
+               src += full_blocks * CHACHA_BLOCK_SIZE;
+               dst += full_blocks * CHACHA_BLOCK_SIZE;
+       }
+       if (tail_bytes) {
+               memcpy(block_buffer, src, tail_bytes);
+               chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
+               memcpy(dst, block_buffer, tail_bytes);
+       }
+       kernel_vector_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+       return static_key_enabled(&use_zvkb);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init riscv64_chacha_mod_init(void)
+{
+       if (riscv_isa_extension_available(NULL, ZVKB) &&
+           riscv_vector_vlen() >= 128)
+               static_branch_enable(&use_zvkb);
+       return 0;
+}
+arch_initcall(riscv64_chacha_mod_init);
+
+static void __exit riscv64_chacha_mod_exit(void)
+{
+}
+module_exit(riscv64_chacha_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
diff --git a/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S b/arch/riscv/lib/crypto/chacha-riscv64-zvkb.S
new file mode 100644 (file)
index 0000000..ab4423b
--- /dev/null
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkb
+
+#define STATEP         a0
+#define INP            a1
+#define OUTP           a2
+#define NBLOCKS                a3
+#define NROUNDS                a4
+
+#define CONSTS0                a5
+#define CONSTS1                a6
+#define CONSTS2                a7
+#define CONSTS3                t0
+#define TMP            t1
+#define VL             t2
+#define STRIDE         t3
+#define ROUND_CTR      t4
+#define KEY0           s0
+#define KEY1           s1
+#define KEY2           s2
+#define KEY3           s3
+#define KEY4           s4
+#define KEY5           s5
+#define KEY6           s6
+#define KEY7           s7
+#define COUNTER                s8
+#define NONCE0         s9
+#define NONCE1         s10
+#define NONCE2         s11
+
+.macro chacha_round    a0, b0, c0, d0,  a1, b1, c1, d1, \
+                       a2, b2, c2, d2,  a3, b3, c3, d3
+       // a += b; d ^= a; d = rol(d, 16);
+       vadd.vv         \a0, \a0, \b0
+       vadd.vv         \a1, \a1, \b1
+       vadd.vv         \a2, \a2, \b2
+       vadd.vv         \a3, \a3, \b3
+       vxor.vv         \d0, \d0, \a0
+       vxor.vv         \d1, \d1, \a1
+       vxor.vv         \d2, \d2, \a2
+       vxor.vv         \d3, \d3, \a3
+       vror.vi         \d0, \d0, 32 - 16
+       vror.vi         \d1, \d1, 32 - 16
+       vror.vi         \d2, \d2, 32 - 16
+       vror.vi         \d3, \d3, 32 - 16
+
+       // c += d; b ^= c; b = rol(b, 12);
+       vadd.vv         \c0, \c0, \d0
+       vadd.vv         \c1, \c1, \d1
+       vadd.vv         \c2, \c2, \d2
+       vadd.vv         \c3, \c3, \d3
+       vxor.vv         \b0, \b0, \c0
+       vxor.vv         \b1, \b1, \c1
+       vxor.vv         \b2, \b2, \c2
+       vxor.vv         \b3, \b3, \c3
+       vror.vi         \b0, \b0, 32 - 12
+       vror.vi         \b1, \b1, 32 - 12
+       vror.vi         \b2, \b2, 32 - 12
+       vror.vi         \b3, \b3, 32 - 12
+
+       // a += b; d ^= a; d = rol(d, 8);
+       vadd.vv         \a0, \a0, \b0
+       vadd.vv         \a1, \a1, \b1
+       vadd.vv         \a2, \a2, \b2
+       vadd.vv         \a3, \a3, \b3
+       vxor.vv         \d0, \d0, \a0
+       vxor.vv         \d1, \d1, \a1
+       vxor.vv         \d2, \d2, \a2
+       vxor.vv         \d3, \d3, \a3
+       vror.vi         \d0, \d0, 32 - 8
+       vror.vi         \d1, \d1, 32 - 8
+       vror.vi         \d2, \d2, 32 - 8
+       vror.vi         \d3, \d3, 32 - 8
+
+       // c += d; b ^= c; b = rol(b, 7);
+       vadd.vv         \c0, \c0, \d0
+       vadd.vv         \c1, \c1, \d1
+       vadd.vv         \c2, \c2, \d2
+       vadd.vv         \c3, \c3, \d3
+       vxor.vv         \b0, \b0, \c0
+       vxor.vv         \b1, \b1, \c1
+       vxor.vv         \b2, \b2, \c2
+       vxor.vv         \b3, \b3, \c3
+       vror.vi         \b0, \b0, 32 - 7
+       vror.vi         \b1, \b1, 32 - 7
+       vror.vi         \b2, \b2, 32 - 7
+       vror.vi         \b3, \b3, 32 - 7
+.endm
+
+// void chacha_zvkb(u32 state[16], const u8 *in, u8 *out, size_t nblocks,
+//                 int nrounds);
+//
+// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
+//
+// |state| gives the ChaCha state matrix, including the 32-bit counter in
+// state[12] following the RFC7539 convention; note that this differs from the
+// original Salsa20 paper which uses a 64-bit counter in state[12..13].  The
+// updated 32-bit counter is written back to state[12] before returning.
+SYM_FUNC_START(chacha_zvkb)
+       addi            sp, sp, -96
+       sd              s0, 0(sp)
+       sd              s1, 8(sp)
+       sd              s2, 16(sp)
+       sd              s3, 24(sp)
+       sd              s4, 32(sp)
+       sd              s5, 40(sp)
+       sd              s6, 48(sp)
+       sd              s7, 56(sp)
+       sd              s8, 64(sp)
+       sd              s9, 72(sp)
+       sd              s10, 80(sp)
+       sd              s11, 88(sp)
+
+       li              STRIDE, 64
+
+       // Set up the initial state matrix in scalar registers.
+       lw              CONSTS0, 0(STATEP)
+       lw              CONSTS1, 4(STATEP)
+       lw              CONSTS2, 8(STATEP)
+       lw              CONSTS3, 12(STATEP)
+       lw              KEY0, 16(STATEP)
+       lw              KEY1, 20(STATEP)
+       lw              KEY2, 24(STATEP)
+       lw              KEY3, 28(STATEP)
+       lw              KEY4, 32(STATEP)
+       lw              KEY5, 36(STATEP)
+       lw              KEY6, 40(STATEP)
+       lw              KEY7, 44(STATEP)
+       lw              COUNTER, 48(STATEP)
+       lw              NONCE0, 52(STATEP)
+       lw              NONCE1, 56(STATEP)
+       lw              NONCE2, 60(STATEP)
+
+.Lblock_loop:
+       // Set vl to the number of blocks to process in this iteration.
+       vsetvli         VL, NBLOCKS, e32, m1, ta, ma
+
+       // Set up the initial state matrix for the next VL blocks in v0-v15.
+       // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
+       // Note that only the counter word, at index 12, differs across blocks.
+       vmv.v.x         v0, CONSTS0
+       vmv.v.x         v1, CONSTS1
+       vmv.v.x         v2, CONSTS2
+       vmv.v.x         v3, CONSTS3
+       vmv.v.x         v4, KEY0
+       vmv.v.x         v5, KEY1
+       vmv.v.x         v6, KEY2
+       vmv.v.x         v7, KEY3
+       vmv.v.x         v8, KEY4
+       vmv.v.x         v9, KEY5
+       vmv.v.x         v10, KEY6
+       vmv.v.x         v11, KEY7
+       vid.v           v12
+       vadd.vx         v12, v12, COUNTER
+       vmv.v.x         v13, NONCE0
+       vmv.v.x         v14, NONCE1
+       vmv.v.x         v15, NONCE2
+
+       // Load the first half of the input data for each block into v16-v23.
+       // v{16+i} holds the i'th 32-bit word for all blocks.
+       vlsseg8e32.v    v16, (INP), STRIDE
+
+       mv              ROUND_CTR, NROUNDS
+.Lnext_doubleround:
+       addi            ROUND_CTR, ROUND_CTR, -2
+       // column round
+       chacha_round    v0, v4, v8, v12, v1, v5, v9, v13, \
+                       v2, v6, v10, v14, v3, v7, v11, v15
+       // diagonal round
+       chacha_round    v0, v5, v10, v15, v1, v6, v11, v12, \
+                       v2, v7, v8, v13, v3, v4, v9, v14
+       bnez            ROUND_CTR, .Lnext_doubleround
+
+       // Load the second half of the input data for each block into v24-v31.
+       // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
+       addi            TMP, INP, 32
+       vlsseg8e32.v    v24, (TMP), STRIDE
+
+       // Finalize the first half of the keystream for each block.
+       vadd.vx         v0, v0, CONSTS0
+       vadd.vx         v1, v1, CONSTS1
+       vadd.vx         v2, v2, CONSTS2
+       vadd.vx         v3, v3, CONSTS3
+       vadd.vx         v4, v4, KEY0
+       vadd.vx         v5, v5, KEY1
+       vadd.vx         v6, v6, KEY2
+       vadd.vx         v7, v7, KEY3
+
+       // Encrypt/decrypt the first half of the data for each block.
+       vxor.vv         v16, v16, v0
+       vxor.vv         v17, v17, v1
+       vxor.vv         v18, v18, v2
+       vxor.vv         v19, v19, v3
+       vxor.vv         v20, v20, v4
+       vxor.vv         v21, v21, v5
+       vxor.vv         v22, v22, v6
+       vxor.vv         v23, v23, v7
+
+       // Store the first half of the output data for each block.
+       vssseg8e32.v    v16, (OUTP), STRIDE
+
+       // Finalize the second half of the keystream for each block.
+       vadd.vx         v8, v8, KEY4
+       vadd.vx         v9, v9, KEY5
+       vadd.vx         v10, v10, KEY6
+       vadd.vx         v11, v11, KEY7
+       vid.v           v0
+       vadd.vx         v12, v12, COUNTER
+       vadd.vx         v13, v13, NONCE0
+       vadd.vx         v14, v14, NONCE1
+       vadd.vx         v15, v15, NONCE2
+       vadd.vv         v12, v12, v0
+
+       // Encrypt/decrypt the second half of the data for each block.
+       vxor.vv         v24, v24, v8
+       vxor.vv         v25, v25, v9
+       vxor.vv         v26, v26, v10
+       vxor.vv         v27, v27, v11
+       vxor.vv         v29, v29, v13
+       vxor.vv         v28, v28, v12
+       vxor.vv         v30, v30, v14
+       vxor.vv         v31, v31, v15
+
+       // Store the second half of the output data for each block.
+       addi            TMP, OUTP, 32
+       vssseg8e32.v    v24, (TMP), STRIDE
+
+       // Update the counter, the remaining number of blocks, and the input and
+       // output pointers according to the number of blocks processed (VL).
+       add             COUNTER, COUNTER, VL
+       sub             NBLOCKS, NBLOCKS, VL
+       slli            TMP, VL, 6
+       add             OUTP, OUTP, TMP
+       add             INP, INP, TMP
+       bnez            NBLOCKS, .Lblock_loop
+
+       sw              COUNTER, 48(STATEP)
+       ld              s0, 0(sp)
+       ld              s1, 8(sp)
+       ld              s2, 16(sp)
+       ld              s3, 24(sp)
+       ld              s4, 32(sp)
+       ld              s5, 40(sp)
+       ld              s6, 48(sp)
+       ld              s7, 56(sp)
+       ld              s8, 64(sp)
+       ld              s9, 72(sp)
+       ld              s10, 80(sp)
+       ld              s11, 88(sp)
+       addi            sp, sp, 96
+       ret
+SYM_FUNC_END(chacha_zvkb)
index 4b3e94ed84bb65057447b4be90064a520ecd26a7..0b06c25eb38a5a83a8ec745c011a44efaf1a26df 100644 (file)
@@ -168,6 +168,9 @@ endif
 if PPC
 source "arch/powerpc/lib/crypto/Kconfig"
 endif
+if RISCV
+source "arch/riscv/lib/crypto/Kconfig"
+endif
 endif
 
 endmenu