- Zvkb vector crypto extension (CTR)
- Zvkg vector crypto extension (XTS)
-config CRYPTO_CHACHA_RISCV64
- tristate
- depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- select CRYPTO_LIB_CHACHA_GENERIC
- default CRYPTO_LIB_CHACHA_INTERNAL
-
config CRYPTO_GHASH_RISCV64
tristate "Hash functions: GHASH"
depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
aes-riscv64-y := aes-riscv64-glue.o aes-riscv64-zvkned.o \
aes-riscv64-zvkned-zvbb-zvkg.o aes-riscv64-zvkned-zvkb.o
-obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
-chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
-
obj-$(CONFIG_CRYPTO_GHASH_RISCV64) += ghash-riscv64.o
ghash-riscv64-y := ghash-riscv64-glue.o ghash-riscv64-zvkg.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ChaCha stream cipher (RISC-V optimized)
- *
- * Copyright (C) 2023 SiFive, Inc.
- * Author: Jerry Shih <jerry.shih@sifive.com>
- */
-
-#include <asm/simd.h>
-#include <asm/vector.h>
-#include <crypto/chacha.h>
-#include <crypto/internal/simd.h>
-#include <linux/linkage.h>
-#include <linux/module.h>
-
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
-
-asmlinkage void chacha_zvkb(u32 state[16], const u8 *in, u8 *out,
- size_t nblocks, int nrounds);
-
-void hchacha_block_arch(const u32 *state, u32 *out, int nrounds)
-{
- hchacha_block_generic(state, out, nrounds);
-}
-EXPORT_SYMBOL(hchacha_block_arch);
-
-void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
- int nrounds)
-{
- u8 block_buffer[CHACHA_BLOCK_SIZE];
- unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
- unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
-
- if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
- return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
- kernel_vector_begin();
- if (full_blocks) {
- chacha_zvkb(state, src, dst, full_blocks, nrounds);
- src += full_blocks * CHACHA_BLOCK_SIZE;
- dst += full_blocks * CHACHA_BLOCK_SIZE;
- }
- if (tail_bytes) {
- memcpy(block_buffer, src, tail_bytes);
- chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
- memcpy(dst, block_buffer, tail_bytes);
- }
- kernel_vector_end();
-}
-EXPORT_SYMBOL(chacha_crypt_arch);
-
-bool chacha_is_arch_optimized(void)
-{
- return static_key_enabled(&use_zvkb);
-}
-EXPORT_SYMBOL(chacha_is_arch_optimized);
-
-static int __init riscv64_chacha_mod_init(void)
-{
- if (riscv_isa_extension_available(NULL, ZVKB) &&
- riscv_vector_vlen() >= 128)
- static_branch_enable(&use_zvkb);
- return 0;
-}
-arch_initcall(riscv64_chacha_mod_init);
-
-static void __exit riscv64_chacha_mod_exit(void)
-{
-}
-module_exit(riscv64_chacha_mod_exit);
-
-MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
-MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
-MODULE_LICENSE("GPL");
+++ /dev/null
-/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
-//
-// This file is dual-licensed, meaning that you can use it under your
-// choice of either of the following two licenses:
-//
-// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the Apache License 2.0 (the "License"). You can obtain
-// a copy in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-//
-// or
-//
-// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
-// Copyright 2024 Google LLC
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions
-// are met:
-// 1. Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// 2. Redistributions in binary form must reproduce the above copyright
-// notice, this list of conditions and the following disclaimer in the
-// documentation and/or other materials provided with the distribution.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-// The generated code of this file depends on the following RISC-V extensions:
-// - RV64I
-// - RISC-V Vector ('V') with VLEN >= 128
-// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
-
-#include <linux/linkage.h>
-
-.text
-.option arch, +zvkb
-
-#define STATEP a0
-#define INP a1
-#define OUTP a2
-#define NBLOCKS a3
-#define NROUNDS a4
-
-#define CONSTS0 a5
-#define CONSTS1 a6
-#define CONSTS2 a7
-#define CONSTS3 t0
-#define TMP t1
-#define VL t2
-#define STRIDE t3
-#define ROUND_CTR t4
-#define KEY0 s0
-#define KEY1 s1
-#define KEY2 s2
-#define KEY3 s3
-#define KEY4 s4
-#define KEY5 s5
-#define KEY6 s6
-#define KEY7 s7
-#define COUNTER s8
-#define NONCE0 s9
-#define NONCE1 s10
-#define NONCE2 s11
-
-.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
- a2, b2, c2, d2, a3, b3, c3, d3
- // a += b; d ^= a; d = rol(d, 16);
- vadd.vv \a0, \a0, \b0
- vadd.vv \a1, \a1, \b1
- vadd.vv \a2, \a2, \b2
- vadd.vv \a3, \a3, \b3
- vxor.vv \d0, \d0, \a0
- vxor.vv \d1, \d1, \a1
- vxor.vv \d2, \d2, \a2
- vxor.vv \d3, \d3, \a3
- vror.vi \d0, \d0, 32 - 16
- vror.vi \d1, \d1, 32 - 16
- vror.vi \d2, \d2, 32 - 16
- vror.vi \d3, \d3, 32 - 16
-
- // c += d; b ^= c; b = rol(b, 12);
- vadd.vv \c0, \c0, \d0
- vadd.vv \c1, \c1, \d1
- vadd.vv \c2, \c2, \d2
- vadd.vv \c3, \c3, \d3
- vxor.vv \b0, \b0, \c0
- vxor.vv \b1, \b1, \c1
- vxor.vv \b2, \b2, \c2
- vxor.vv \b3, \b3, \c3
- vror.vi \b0, \b0, 32 - 12
- vror.vi \b1, \b1, 32 - 12
- vror.vi \b2, \b2, 32 - 12
- vror.vi \b3, \b3, 32 - 12
-
- // a += b; d ^= a; d = rol(d, 8);
- vadd.vv \a0, \a0, \b0
- vadd.vv \a1, \a1, \b1
- vadd.vv \a2, \a2, \b2
- vadd.vv \a3, \a3, \b3
- vxor.vv \d0, \d0, \a0
- vxor.vv \d1, \d1, \a1
- vxor.vv \d2, \d2, \a2
- vxor.vv \d3, \d3, \a3
- vror.vi \d0, \d0, 32 - 8
- vror.vi \d1, \d1, 32 - 8
- vror.vi \d2, \d2, 32 - 8
- vror.vi \d3, \d3, 32 - 8
-
- // c += d; b ^= c; b = rol(b, 7);
- vadd.vv \c0, \c0, \d0
- vadd.vv \c1, \c1, \d1
- vadd.vv \c2, \c2, \d2
- vadd.vv \c3, \c3, \d3
- vxor.vv \b0, \b0, \c0
- vxor.vv \b1, \b1, \c1
- vxor.vv \b2, \b2, \c2
- vxor.vv \b3, \b3, \c3
- vror.vi \b0, \b0, 32 - 7
- vror.vi \b1, \b1, 32 - 7
- vror.vi \b2, \b2, 32 - 7
- vror.vi \b3, \b3, 32 - 7
-.endm
-
-// void chacha_zvkb(u32 state[16], const u8 *in, u8 *out, size_t nblocks,
-// int nrounds);
-//
-// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
-//
-// |state| gives the ChaCha state matrix, including the 32-bit counter in
-// state[12] following the RFC7539 convention; note that this differs from the
-// original Salsa20 paper which uses a 64-bit counter in state[12..13]. The
-// updated 32-bit counter is written back to state[12] before returning.
-SYM_FUNC_START(chacha_zvkb)
- addi sp, sp, -96
- sd s0, 0(sp)
- sd s1, 8(sp)
- sd s2, 16(sp)
- sd s3, 24(sp)
- sd s4, 32(sp)
- sd s5, 40(sp)
- sd s6, 48(sp)
- sd s7, 56(sp)
- sd s8, 64(sp)
- sd s9, 72(sp)
- sd s10, 80(sp)
- sd s11, 88(sp)
-
- li STRIDE, 64
-
- // Set up the initial state matrix in scalar registers.
- lw CONSTS0, 0(STATEP)
- lw CONSTS1, 4(STATEP)
- lw CONSTS2, 8(STATEP)
- lw CONSTS3, 12(STATEP)
- lw KEY0, 16(STATEP)
- lw KEY1, 20(STATEP)
- lw KEY2, 24(STATEP)
- lw KEY3, 28(STATEP)
- lw KEY4, 32(STATEP)
- lw KEY5, 36(STATEP)
- lw KEY6, 40(STATEP)
- lw KEY7, 44(STATEP)
- lw COUNTER, 48(STATEP)
- lw NONCE0, 52(STATEP)
- lw NONCE1, 56(STATEP)
- lw NONCE2, 60(STATEP)
-
-.Lblock_loop:
- // Set vl to the number of blocks to process in this iteration.
- vsetvli VL, NBLOCKS, e32, m1, ta, ma
-
- // Set up the initial state matrix for the next VL blocks in v0-v15.
- // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
- // Note that only the counter word, at index 12, differs across blocks.
- vmv.v.x v0, CONSTS0
- vmv.v.x v1, CONSTS1
- vmv.v.x v2, CONSTS2
- vmv.v.x v3, CONSTS3
- vmv.v.x v4, KEY0
- vmv.v.x v5, KEY1
- vmv.v.x v6, KEY2
- vmv.v.x v7, KEY3
- vmv.v.x v8, KEY4
- vmv.v.x v9, KEY5
- vmv.v.x v10, KEY6
- vmv.v.x v11, KEY7
- vid.v v12
- vadd.vx v12, v12, COUNTER
- vmv.v.x v13, NONCE0
- vmv.v.x v14, NONCE1
- vmv.v.x v15, NONCE2
-
- // Load the first half of the input data for each block into v16-v23.
- // v{16+i} holds the i'th 32-bit word for all blocks.
- vlsseg8e32.v v16, (INP), STRIDE
-
- mv ROUND_CTR, NROUNDS
-.Lnext_doubleround:
- addi ROUND_CTR, ROUND_CTR, -2
- // column round
- chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
- v2, v6, v10, v14, v3, v7, v11, v15
- // diagonal round
- chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
- v2, v7, v8, v13, v3, v4, v9, v14
- bnez ROUND_CTR, .Lnext_doubleround
-
- // Load the second half of the input data for each block into v24-v31.
- // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
- addi TMP, INP, 32
- vlsseg8e32.v v24, (TMP), STRIDE
-
- // Finalize the first half of the keystream for each block.
- vadd.vx v0, v0, CONSTS0
- vadd.vx v1, v1, CONSTS1
- vadd.vx v2, v2, CONSTS2
- vadd.vx v3, v3, CONSTS3
- vadd.vx v4, v4, KEY0
- vadd.vx v5, v5, KEY1
- vadd.vx v6, v6, KEY2
- vadd.vx v7, v7, KEY3
-
- // Encrypt/decrypt the first half of the data for each block.
- vxor.vv v16, v16, v0
- vxor.vv v17, v17, v1
- vxor.vv v18, v18, v2
- vxor.vv v19, v19, v3
- vxor.vv v20, v20, v4
- vxor.vv v21, v21, v5
- vxor.vv v22, v22, v6
- vxor.vv v23, v23, v7
-
- // Store the first half of the output data for each block.
- vssseg8e32.v v16, (OUTP), STRIDE
-
- // Finalize the second half of the keystream for each block.
- vadd.vx v8, v8, KEY4
- vadd.vx v9, v9, KEY5
- vadd.vx v10, v10, KEY6
- vadd.vx v11, v11, KEY7
- vid.v v0
- vadd.vx v12, v12, COUNTER
- vadd.vx v13, v13, NONCE0
- vadd.vx v14, v14, NONCE1
- vadd.vx v15, v15, NONCE2
- vadd.vv v12, v12, v0
-
- // Encrypt/decrypt the second half of the data for each block.
- vxor.vv v24, v24, v8
- vxor.vv v25, v25, v9
- vxor.vv v26, v26, v10
- vxor.vv v27, v27, v11
- vxor.vv v29, v29, v13
- vxor.vv v28, v28, v12
- vxor.vv v30, v30, v14
- vxor.vv v31, v31, v15
-
- // Store the second half of the output data for each block.
- addi TMP, OUTP, 32
- vssseg8e32.v v24, (TMP), STRIDE
-
- // Update the counter, the remaining number of blocks, and the input and
- // output pointers according to the number of blocks processed (VL).
- add COUNTER, COUNTER, VL
- sub NBLOCKS, NBLOCKS, VL
- slli TMP, VL, 6
- add OUTP, OUTP, TMP
- add INP, INP, TMP
- bnez NBLOCKS, .Lblock_loop
-
- sw COUNTER, 48(STATEP)
- ld s0, 0(sp)
- ld s1, 8(sp)
- ld s2, 16(sp)
- ld s3, 24(sp)
- ld s4, 32(sp)
- ld s5, 40(sp)
- ld s6, 48(sp)
- ld s7, 56(sp)
- ld s8, 64(sp)
- ld s9, 72(sp)
- ld s10, 80(sp)
- ld s11, 88(sp)
- addi sp, sp, 96
- ret
-SYM_FUNC_END(chacha_zvkb)
# SPDX-License-Identifier: GPL-2.0-only
+obj-y += crypto/
lib-y += delay.o
lib-y += memcpy.o
lib-y += memset.o
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+
+config CRYPTO_CHACHA_RISCV64
+ tristate
+ depends on 64BIT && RISCV_ISA_V && TOOLCHAIN_HAS_VECTOR_CRYPTO
+ default CRYPTO_LIB_CHACHA_INTERNAL
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ select CRYPTO_LIB_CHACHA_GENERIC
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-$(CONFIG_CRYPTO_CHACHA_RISCV64) += chacha-riscv64.o
+chacha-riscv64-y := chacha-riscv64-glue.o chacha-riscv64-zvkb.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ChaCha stream cipher (RISC-V optimized)
+ *
+ * Copyright (C) 2023 SiFive, Inc.
+ * Author: Jerry Shih <jerry.shih@sifive.com>
+ */
+
+#include <asm/simd.h>
+#include <asm/vector.h>
+#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
+#include <linux/linkage.h>
+#include <linux/module.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_zvkb);
+
+asmlinkage void chacha_zvkb(u32 state[16], const u8 *in, u8 *out,
+ size_t nblocks, int nrounds);
+
+void hchacha_block_arch(const u32 *state, u32 *out, int nrounds)
+{
+ hchacha_block_generic(state, out, nrounds);
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ u8 block_buffer[CHACHA_BLOCK_SIZE];
+ unsigned int full_blocks = bytes / CHACHA_BLOCK_SIZE;
+ unsigned int tail_bytes = bytes % CHACHA_BLOCK_SIZE;
+
+ if (!static_branch_likely(&use_zvkb) || !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_vector_begin();
+ if (full_blocks) {
+ chacha_zvkb(state, src, dst, full_blocks, nrounds);
+ src += full_blocks * CHACHA_BLOCK_SIZE;
+ dst += full_blocks * CHACHA_BLOCK_SIZE;
+ }
+ if (tail_bytes) {
+ memcpy(block_buffer, src, tail_bytes);
+ chacha_zvkb(state, block_buffer, block_buffer, 1, nrounds);
+ memcpy(dst, block_buffer, tail_bytes);
+ }
+ kernel_vector_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+bool chacha_is_arch_optimized(void)
+{
+ return static_key_enabled(&use_zvkb);
+}
+EXPORT_SYMBOL(chacha_is_arch_optimized);
+
+static int __init riscv64_chacha_mod_init(void)
+{
+ if (riscv_isa_extension_available(NULL, ZVKB) &&
+ riscv_vector_vlen() >= 128)
+ static_branch_enable(&use_zvkb);
+ return 0;
+}
+arch_initcall(riscv64_chacha_mod_init);
+
+static void __exit riscv64_chacha_mod_exit(void)
+{
+}
+module_exit(riscv64_chacha_mod_exit);
+
+MODULE_DESCRIPTION("ChaCha stream cipher (RISC-V optimized)");
+MODULE_AUTHOR("Jerry Shih <jerry.shih@sifive.com>");
+MODULE_LICENSE("GPL");
--- /dev/null
+/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
+//
+// This file is dual-licensed, meaning that you can use it under your
+// choice of either of the following two licenses:
+//
+// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the Apache License 2.0 (the "License"). You can obtain
+// a copy in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+//
+// or
+//
+// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
+// Copyright 2024 Google LLC
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// The generated code of this file depends on the following RISC-V extensions:
+// - RV64I
+// - RISC-V Vector ('V') with VLEN >= 128
+// - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
+
+#include <linux/linkage.h>
+
+.text
+.option arch, +zvkb
+
+#define STATEP a0
+#define INP a1
+#define OUTP a2
+#define NBLOCKS a3
+#define NROUNDS a4
+
+#define CONSTS0 a5
+#define CONSTS1 a6
+#define CONSTS2 a7
+#define CONSTS3 t0
+#define TMP t1
+#define VL t2
+#define STRIDE t3
+#define ROUND_CTR t4
+#define KEY0 s0
+#define KEY1 s1
+#define KEY2 s2
+#define KEY3 s3
+#define KEY4 s4
+#define KEY5 s5
+#define KEY6 s6
+#define KEY7 s7
+#define COUNTER s8
+#define NONCE0 s9
+#define NONCE1 s10
+#define NONCE2 s11
+
+.macro chacha_round a0, b0, c0, d0, a1, b1, c1, d1, \
+ a2, b2, c2, d2, a3, b3, c3, d3
+ // a += b; d ^= a; d = rol(d, 16);
+ vadd.vv \a0, \a0, \b0
+ vadd.vv \a1, \a1, \b1
+ vadd.vv \a2, \a2, \b2
+ vadd.vv \a3, \a3, \b3
+ vxor.vv \d0, \d0, \a0
+ vxor.vv \d1, \d1, \a1
+ vxor.vv \d2, \d2, \a2
+ vxor.vv \d3, \d3, \a3
+ vror.vi \d0, \d0, 32 - 16
+ vror.vi \d1, \d1, 32 - 16
+ vror.vi \d2, \d2, 32 - 16
+ vror.vi \d3, \d3, 32 - 16
+
+ // c += d; b ^= c; b = rol(b, 12);
+ vadd.vv \c0, \c0, \d0
+ vadd.vv \c1, \c1, \d1
+ vadd.vv \c2, \c2, \d2
+ vadd.vv \c3, \c3, \d3
+ vxor.vv \b0, \b0, \c0
+ vxor.vv \b1, \b1, \c1
+ vxor.vv \b2, \b2, \c2
+ vxor.vv \b3, \b3, \c3
+ vror.vi \b0, \b0, 32 - 12
+ vror.vi \b1, \b1, 32 - 12
+ vror.vi \b2, \b2, 32 - 12
+ vror.vi \b3, \b3, 32 - 12
+
+ // a += b; d ^= a; d = rol(d, 8);
+ vadd.vv \a0, \a0, \b0
+ vadd.vv \a1, \a1, \b1
+ vadd.vv \a2, \a2, \b2
+ vadd.vv \a3, \a3, \b3
+ vxor.vv \d0, \d0, \a0
+ vxor.vv \d1, \d1, \a1
+ vxor.vv \d2, \d2, \a2
+ vxor.vv \d3, \d3, \a3
+ vror.vi \d0, \d0, 32 - 8
+ vror.vi \d1, \d1, 32 - 8
+ vror.vi \d2, \d2, 32 - 8
+ vror.vi \d3, \d3, 32 - 8
+
+ // c += d; b ^= c; b = rol(b, 7);
+ vadd.vv \c0, \c0, \d0
+ vadd.vv \c1, \c1, \d1
+ vadd.vv \c2, \c2, \d2
+ vadd.vv \c3, \c3, \d3
+ vxor.vv \b0, \b0, \c0
+ vxor.vv \b1, \b1, \c1
+ vxor.vv \b2, \b2, \c2
+ vxor.vv \b3, \b3, \c3
+ vror.vi \b0, \b0, 32 - 7
+ vror.vi \b1, \b1, 32 - 7
+ vror.vi \b2, \b2, 32 - 7
+ vror.vi \b3, \b3, 32 - 7
+.endm
+
+// void chacha_zvkb(u32 state[16], const u8 *in, u8 *out, size_t nblocks,
+// int nrounds);
+//
+// |nblocks| is the number of 64-byte blocks to process, and must be nonzero.
+//
+// |state| gives the ChaCha state matrix, including the 32-bit counter in
+// state[12] following the RFC7539 convention; note that this differs from the
+// original Salsa20 paper which uses a 64-bit counter in state[12..13]. The
+// updated 32-bit counter is written back to state[12] before returning.
+SYM_FUNC_START(chacha_zvkb)
+ addi sp, sp, -96
+ sd s0, 0(sp)
+ sd s1, 8(sp)
+ sd s2, 16(sp)
+ sd s3, 24(sp)
+ sd s4, 32(sp)
+ sd s5, 40(sp)
+ sd s6, 48(sp)
+ sd s7, 56(sp)
+ sd s8, 64(sp)
+ sd s9, 72(sp)
+ sd s10, 80(sp)
+ sd s11, 88(sp)
+
+ li STRIDE, 64
+
+ // Set up the initial state matrix in scalar registers.
+ lw CONSTS0, 0(STATEP)
+ lw CONSTS1, 4(STATEP)
+ lw CONSTS2, 8(STATEP)
+ lw CONSTS3, 12(STATEP)
+ lw KEY0, 16(STATEP)
+ lw KEY1, 20(STATEP)
+ lw KEY2, 24(STATEP)
+ lw KEY3, 28(STATEP)
+ lw KEY4, 32(STATEP)
+ lw KEY5, 36(STATEP)
+ lw KEY6, 40(STATEP)
+ lw KEY7, 44(STATEP)
+ lw COUNTER, 48(STATEP)
+ lw NONCE0, 52(STATEP)
+ lw NONCE1, 56(STATEP)
+ lw NONCE2, 60(STATEP)
+
+.Lblock_loop:
+ // Set vl to the number of blocks to process in this iteration.
+ vsetvli VL, NBLOCKS, e32, m1, ta, ma
+
+ // Set up the initial state matrix for the next VL blocks in v0-v15.
+ // v{i} holds the i'th 32-bit word of the state matrix for all blocks.
+ // Note that only the counter word, at index 12, differs across blocks.
+ vmv.v.x v0, CONSTS0
+ vmv.v.x v1, CONSTS1
+ vmv.v.x v2, CONSTS2
+ vmv.v.x v3, CONSTS3
+ vmv.v.x v4, KEY0
+ vmv.v.x v5, KEY1
+ vmv.v.x v6, KEY2
+ vmv.v.x v7, KEY3
+ vmv.v.x v8, KEY4
+ vmv.v.x v9, KEY5
+ vmv.v.x v10, KEY6
+ vmv.v.x v11, KEY7
+ vid.v v12
+ vadd.vx v12, v12, COUNTER
+ vmv.v.x v13, NONCE0
+ vmv.v.x v14, NONCE1
+ vmv.v.x v15, NONCE2
+
+ // Load the first half of the input data for each block into v16-v23.
+ // v{16+i} holds the i'th 32-bit word for all blocks.
+ vlsseg8e32.v v16, (INP), STRIDE
+
+ mv ROUND_CTR, NROUNDS
+.Lnext_doubleround:
+ addi ROUND_CTR, ROUND_CTR, -2
+ // column round
+ chacha_round v0, v4, v8, v12, v1, v5, v9, v13, \
+ v2, v6, v10, v14, v3, v7, v11, v15
+ // diagonal round
+ chacha_round v0, v5, v10, v15, v1, v6, v11, v12, \
+ v2, v7, v8, v13, v3, v4, v9, v14
+ bnez ROUND_CTR, .Lnext_doubleround
+
+ // Load the second half of the input data for each block into v24-v31.
+ // v{24+i} holds the {8+i}'th 32-bit word for all blocks.
+ addi TMP, INP, 32
+ vlsseg8e32.v v24, (TMP), STRIDE
+
+ // Finalize the first half of the keystream for each block.
+ vadd.vx v0, v0, CONSTS0
+ vadd.vx v1, v1, CONSTS1
+ vadd.vx v2, v2, CONSTS2
+ vadd.vx v3, v3, CONSTS3
+ vadd.vx v4, v4, KEY0
+ vadd.vx v5, v5, KEY1
+ vadd.vx v6, v6, KEY2
+ vadd.vx v7, v7, KEY3
+
+ // Encrypt/decrypt the first half of the data for each block.
+ vxor.vv v16, v16, v0
+ vxor.vv v17, v17, v1
+ vxor.vv v18, v18, v2
+ vxor.vv v19, v19, v3
+ vxor.vv v20, v20, v4
+ vxor.vv v21, v21, v5
+ vxor.vv v22, v22, v6
+ vxor.vv v23, v23, v7
+
+ // Store the first half of the output data for each block.
+ vssseg8e32.v v16, (OUTP), STRIDE
+
+ // Finalize the second half of the keystream for each block.
+ vadd.vx v8, v8, KEY4
+ vadd.vx v9, v9, KEY5
+ vadd.vx v10, v10, KEY6
+ vadd.vx v11, v11, KEY7
+ vid.v v0
+ vadd.vx v12, v12, COUNTER
+ vadd.vx v13, v13, NONCE0
+ vadd.vx v14, v14, NONCE1
+ vadd.vx v15, v15, NONCE2
+ vadd.vv v12, v12, v0
+
+ // Encrypt/decrypt the second half of the data for each block.
+ vxor.vv v24, v24, v8
+ vxor.vv v25, v25, v9
+ vxor.vv v26, v26, v10
+ vxor.vv v27, v27, v11
+ vxor.vv v29, v29, v13
+ vxor.vv v28, v28, v12
+ vxor.vv v30, v30, v14
+ vxor.vv v31, v31, v15
+
+ // Store the second half of the output data for each block.
+ addi TMP, OUTP, 32
+ vssseg8e32.v v24, (TMP), STRIDE
+
+ // Update the counter, the remaining number of blocks, and the input and
+ // output pointers according to the number of blocks processed (VL).
+ add COUNTER, COUNTER, VL
+ sub NBLOCKS, NBLOCKS, VL
+ slli TMP, VL, 6
+ add OUTP, OUTP, TMP
+ add INP, INP, TMP
+ bnez NBLOCKS, .Lblock_loop
+
+ sw COUNTER, 48(STATEP)
+ ld s0, 0(sp)
+ ld s1, 8(sp)
+ ld s2, 16(sp)
+ ld s3, 24(sp)
+ ld s4, 32(sp)
+ ld s5, 40(sp)
+ ld s6, 48(sp)
+ ld s7, 56(sp)
+ ld s8, 64(sp)
+ ld s9, 72(sp)
+ ld s10, 80(sp)
+ ld s11, 88(sp)
+ addi sp, sp, 96
+ ret
+SYM_FUNC_END(chacha_zvkb)
if PPC
source "arch/powerpc/lib/crypto/Kconfig"
endif
+if RISCV
+source "arch/riscv/lib/crypto/Kconfig"
+endif
endif
endmenu