crypto: arm64/sha1-ce - move SHA-1 ARMv8 implementation to base layer
authorArd Biesheuvel <ard.biesheuvel@linaro.org>
Thu, 9 Apr 2015 10:55:44 +0000 (12:55 +0200)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 10 Apr 2015 13:39:46 +0000 (21:39 +0800)
This removes all the boilerplate from the existing implementation,
and replaces it with calls into the base layer.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm64/crypto/sha1-ce-core.S
arch/arm64/crypto/sha1-ce-glue.c

index 09d57d98609cc8ff9c62a5e729060a4336ff095f..033aae6d732a14464d0167e9aaee5e4e9c1a427b 100644 (file)
@@ -66,8 +66,8 @@
        .word           0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
 
        /*
-        * void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
-        *                        u8 *head, long bytes)
+        * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+        *                        int blocks)
         */
 ENTRY(sha1_ce_transform)
        /* load round constants */
@@ -78,25 +78,22 @@ ENTRY(sha1_ce_transform)
        ld1r            {k3.4s}, [x6]
 
        /* load state */
-       ldr             dga, [x2]
-       ldr             dgb, [x2, #16]
+       ldr             dga, [x0]
+       ldr             dgb, [x0, #16]
 
-       /* load partial state (if supplied) */
-       cbz             x3, 0f
-       ld1             {v8.4s-v11.4s}, [x3]
-       b               1f
+       /* load sha1_ce_state::finalize */
+       ldr             w4, [x0, #:lo12:sha1_ce_offsetof_finalize]
 
        /* load input */
 0:     ld1             {v8.4s-v11.4s}, [x1], #64
-       sub             w0, w0, #1
+       sub             w2, w2, #1
 
-1:
 CPU_LE(        rev32           v8.16b, v8.16b          )
 CPU_LE(        rev32           v9.16b, v9.16b          )
 CPU_LE(        rev32           v10.16b, v10.16b        )
 CPU_LE(        rev32           v11.16b, v11.16b        )
 
-2:     add             t0.4s, v8.4s, k0.4s
+1:     add             t0.4s, v8.4s, k0.4s
        mov             dg0v.16b, dgav.16b
 
        add_update      c, ev, k0,  8,  9, 10, 11, dgb
@@ -127,15 +124,15 @@ CPU_LE(   rev32           v11.16b, v11.16b        )
        add             dgbv.2s, dgbv.2s, dg1v.2s
        add             dgav.4s, dgav.4s, dg0v.4s
 
-       cbnz            w0, 0b
+       cbnz            w2, 0b
 
        /*
         * Final block: add padding and total bit count.
-        * Skip if we have no total byte count in x4. In that case, the input
-        * size was not a round multiple of the block size, and the padding is
-        * handled by the C code.
+        * Skip if the input size was not a round multiple of the block size,
+        * the padding is handled by the C code in that case.
         */
        cbz             x4, 3f
+       ldr             x4, [x0, #:lo12:sha1_ce_offsetof_count]
        movi            v9.2d, #0
        mov             x8, #0x80000000
        movi            v10.2d, #0
@@ -144,10 +141,10 @@ CPU_LE(   rev32           v11.16b, v11.16b        )
        mov             x4, #0
        mov             v11.d[0], xzr
        mov             v11.d[1], x7
-       b               2b
+       b               1b
 
        /* store new state */
-3:     str             dga, [x2]
-       str             dgb, [x2, #16]
+3:     str             dga, [x0]
+       str             dgb, [x0, #16]
        ret
 ENDPROC(sha1_ce_transform)
index 6fe83f37a7500f789e7cb27c06950e926bc0eebd..114e7cc5de8c09b4eb75f6b11294e9c689d1ebbc 100644 (file)
 #include <asm/unaligned.h>
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
+#include <crypto/sha1_base.h>
 #include <linux/cpufeature.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
 
+#define ASM_EXPORT(sym, val) \
+       asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val));
+
 MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage void sha1_ce_transform(int blocks, u8 const *src, u32 *state,
-                                 u8 *head, long bytes);
+struct sha1_ce_state {
+       struct sha1_state       sst;
+       u32                     finalize;
+};
 
-static int sha1_init(struct shash_desc *desc)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
+asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
+                                 int blocks);
 
-       *sctx = (struct sha1_state){
-               .state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
-       };
-       return 0;
-}
-
-static int sha1_update(struct shash_desc *desc, const u8 *data,
-                      unsigned int len)
+static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
+                         unsigned int len)
 {
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
-
-       sctx->count += len;
-
-       if ((partial + len) >= SHA1_BLOCK_SIZE) {
-               int blocks;
-
-               if (partial) {
-                       int p = SHA1_BLOCK_SIZE - partial;
+       struct sha1_ce_state *sctx = shash_desc_ctx(desc);
 
-                       memcpy(sctx->buffer + partial, data, p);
-                       data += p;
-                       len -= p;
-               }
-
-               blocks = len / SHA1_BLOCK_SIZE;
-               len %= SHA1_BLOCK_SIZE;
-
-               kernel_neon_begin_partial(16);
-               sha1_ce_transform(blocks, data, sctx->state,
-                                 partial ? sctx->buffer : NULL, 0);
-               kernel_neon_end();
+       sctx->finalize = 0;
+       kernel_neon_begin_partial(16);
+       sha1_base_do_update(desc, data, len,
+                           (sha1_block_fn *)sha1_ce_transform);
+       kernel_neon_end();
 
-               data += blocks * SHA1_BLOCK_SIZE;
-               partial = 0;
-       }
-       if (len)
-               memcpy(sctx->buffer + partial, data, len);
        return 0;
 }
 
-static int sha1_final(struct shash_desc *desc, u8 *out)
+static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
+                        unsigned int len, u8 *out)
 {
-       static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
+       struct sha1_ce_state *sctx = shash_desc_ctx(desc);
+       bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
 
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       __be64 bits = cpu_to_be64(sctx->count << 3);
-       __be32 *dst = (__be32 *)out;
-       int i;
-
-       u32 padlen = SHA1_BLOCK_SIZE
-                    - ((sctx->count + sizeof(bits)) % SHA1_BLOCK_SIZE);
-
-       sha1_update(desc, padding, padlen);
-       sha1_update(desc, (const u8 *)&bits, sizeof(bits));
-
-       for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
-               put_unaligned_be32(sctx->state[i], dst++);
-
-       *sctx = (struct sha1_state){};
-       return 0;
-}
-
-static int sha1_finup(struct shash_desc *desc, const u8 *data,
-                     unsigned int len, u8 *out)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       __be32 *dst = (__be32 *)out;
-       int blocks;
-       int i;
-
-       if (sctx->count || !len || (len % SHA1_BLOCK_SIZE)) {
-               sha1_update(desc, data, len);
-               return sha1_final(desc, out);
-       }
+       ASM_EXPORT(sha1_ce_offsetof_count,
+                  offsetof(struct sha1_ce_state, sst.count));
+       ASM_EXPORT(sha1_ce_offsetof_finalize,
+                  offsetof(struct sha1_ce_state, finalize));
 
        /*
-        * Use a fast path if the input is a multiple of 64 bytes. In
-        * this case, there is no need to copy data around, and we can
-        * perform the entire digest calculation in a single invocation
-        * of sha1_ce_transform()
+        * Allow the asm code to perform the finalization if there is no
+        * partial data and the input is a round multiple of the block size.
         */
-       blocks = len / SHA1_BLOCK_SIZE;
+       sctx->finalize = finalize;
 
        kernel_neon_begin_partial(16);
-       sha1_ce_transform(blocks, data, sctx->state, NULL, len);
+       sha1_base_do_update(desc, data, len,
+                           (sha1_block_fn *)sha1_ce_transform);
+       if (!finalize)
+               sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
        kernel_neon_end();
-
-       for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(__be32); i++)
-               put_unaligned_be32(sctx->state[i], dst++);
-
-       *sctx = (struct sha1_state){};
-       return 0;
+       return sha1_base_finish(desc, out);
 }
 
-static int sha1_export(struct shash_desc *desc, void *out)
+static int sha1_ce_final(struct shash_desc *desc, u8 *out)
 {
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       struct sha1_state *dst = out;
-
-       *dst = *sctx;
-       return 0;
-}
-
-static int sha1_import(struct shash_desc *desc, const void *in)
-{
-       struct sha1_state *sctx = shash_desc_ctx(desc);
-       struct sha1_state const *src = in;
-
-       *sctx = *src;
-       return 0;
+       kernel_neon_begin_partial(16);
+       sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
+       kernel_neon_end();
+       return sha1_base_finish(desc, out);
 }
 
 static struct shash_alg alg = {
-       .init                   = sha1_init,
-       .update                 = sha1_update,
-       .final                  = sha1_final,
-       .finup                  = sha1_finup,
-       .export                 = sha1_export,
-       .import                 = sha1_import,
-       .descsize               = sizeof(struct sha1_state),
+       .init                   = sha1_base_init,
+       .update                 = sha1_ce_update,
+       .final                  = sha1_ce_final,
+       .finup                  = sha1_ce_finup,
+       .descsize               = sizeof(struct sha1_ce_state),
        .digestsize             = SHA1_DIGEST_SIZE,
-       .statesize              = sizeof(struct sha1_state),
        .base                   = {
                .cra_name               = "sha1",
                .cra_driver_name        = "sha1-ce",