crypto: aesni - Add scatter/gather avx stubs, and use them in C
authorDave Watson <davejwatson@fb.com>
Mon, 10 Dec 2018 19:59:59 +0000 (19:59 +0000)
committerHerbert Xu <herbert@gondor.apana.org.au>
Sun, 23 Dec 2018 03:52:43 +0000 (11:52 +0800)
Add the appropriate scatter/gather stubs to the avx asm.
In the C code, we can now always use crypt_by_sg, since both
sse and asm code now support scatter/gather.

Introduce a new struct, aesni_gcm_tfm, that is initialized on
startup to point to either the SSE, AVX, or AVX2 versions of the
four necessary encryption/decryption routines.

GENX_OPTSIZE is still checked at the start of crypt_by_sg.  The
total size of the data is checked, since the additional overhead
is in the init function, calculating additional HashKeys.

Signed-off-by: Dave Watson <davejwatson@fb.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/aesni-intel_avx-x86_64.S
arch/x86/crypto/aesni-intel_glue.c

index af45fc57db903472da4e4f6541bcd29434a8c598..91c039ab56999d914f15d1796b70ddf94e2d6fe5 100644 (file)
@@ -518,14 +518,13 @@ _less_than_8_bytes_left\@:
         #############################
 
 _multiple_of_16_bytes\@:
-        GCM_COMPLETE \GHASH_MUL \REP
 .endm
 
 
 # GCM_COMPLETE Finishes update of tag of last partial block
 # Output: Authorization Tag (AUTH_TAG)
 # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
-.macro GCM_COMPLETE GHASH_MUL REP
+.macro GCM_COMPLETE GHASH_MUL REP AUTH_TAG AUTH_TAG_LEN
         vmovdqu AadHash(arg2), %xmm14
         vmovdqu HashKey(arg2), %xmm13
 
@@ -560,8 +559,8 @@ _partial_done\@:
 
 
 _return_T\@:
-        mov     arg9, %r10              # r10 = authTag
-        mov     arg10, %r11              # r11 = auth_tag_len
+        mov     \AUTH_TAG, %r10              # r10 = authTag
+        mov     \AUTH_TAG_LEN, %r11              # r11 = auth_tag_len
 
         cmp     $16, %r11
         je      _T_16\@
@@ -680,14 +679,14 @@ _get_AAD_done\@:
 
         mov %r11, PBlockLen(arg2) # ctx_data.partial_block_length = 0
         mov %r11, PBlockEncKey(arg2) # ctx_data.partial_block_enc_key = 0
-        mov arg4, %rax
+        mov arg3, %rax
         movdqu (%rax), %xmm0
         movdqu %xmm0, OrigIV(arg2) # ctx_data.orig_IV = iv
 
         vpshufb SHUF_MASK(%rip), %xmm0, %xmm0
         movdqu %xmm0, CurCount(arg2) # ctx_data.current_counter = iv
 
-        vmovdqu  (arg3), %xmm6              # xmm6 = HashKey
+        vmovdqu  (arg4), %xmm6              # xmm6 = HashKey
 
         vpshufb  SHUF_MASK(%rip), %xmm6, %xmm6
         ###############  PRECOMPUTATION of HashKey<<1 mod poly from the HashKey
@@ -1776,88 +1775,100 @@ _initial_blocks_done\@:
 #        const   u8 *aad, /* Additional Authentication Data (AAD)*/
 #        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
 #############################################################
-ENTRY(aesni_gcm_precomp_avx_gen2)
+ENTRY(aesni_gcm_init_avx_gen2)
         FUNC_SAVE
         INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_precomp_avx_gen2)
+ENDPROC(aesni_gcm_init_avx_gen2)
 
 ###############################################################################
-#void   aesni_gcm_enc_avx_gen2(
+#void   aesni_gcm_enc_update_avx_gen2(
 #        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
 #        gcm_context_data *data,
 #        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
 #        const   u8 *in, /* Plaintext input */
-#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
-#        u8      *iv, /* Pre-counter block j0: 4 byte salt
-#                      (from Security Association) concatenated with 8 byte
-#                      Initialisation Vector (from IPSec ESP Payload)
-#                      concatenated with 0x00000001. 16-byte aligned pointer. */
-#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
-#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-#        u8      *auth_tag, /* Authenticated Tag output. */
-#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
-#                              Valid values are 16 (most likely), 12 or 8. */
+#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_enc_avx_gen2)
+ENTRY(aesni_gcm_enc_update_avx_gen2)
         FUNC_SAVE
         mov     keysize, %eax
         cmp     $32, %eax
-        je      key_256_enc
+        je      key_256_enc_update
         cmp     $16, %eax
-        je      key_128_enc
+        je      key_128_enc_update
         # must be 192
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 11
         FUNC_RESTORE
         ret
-key_128_enc:
+key_128_enc_update:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 9
         FUNC_RESTORE
         ret
-key_256_enc:
+key_256_enc_update:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_enc_avx_gen2)
+ENDPROC(aesni_gcm_enc_update_avx_gen2)
 
 ###############################################################################
-#void   aesni_gcm_dec_avx_gen2(
+#void   aesni_gcm_dec_update_avx_gen2(
 #        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
 #        gcm_context_data *data,
 #        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
 #        const   u8 *in, /* Ciphertext input */
-#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
-#        u8      *iv, /* Pre-counter block j0: 4 byte salt
-#                      (from Security Association) concatenated with 8 byte
-#                      Initialisation Vector (from IPSec ESP Payload)
-#                      concatenated with 0x00000001. 16-byte aligned pointer. */
-#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
-#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-#        u8      *auth_tag, /* Authenticated Tag output. */
-#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
-#                              Valid values are 16 (most likely), 12 or 8. */
+#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_dec_avx_gen2)
+ENTRY(aesni_gcm_dec_update_avx_gen2)
         FUNC_SAVE
         mov     keysize,%eax
         cmp     $32, %eax
-        je      key_256_dec
+        je      key_256_dec_update
         cmp     $16, %eax
-        je      key_128_dec
+        je      key_128_dec_update
         # must be 192
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 11
         FUNC_RESTORE
         ret
-key_128_dec:
+key_128_dec_update:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 9
         FUNC_RESTORE
         ret
-key_256_dec:
+key_256_dec_update:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_dec_avx_gen2)
+ENDPROC(aesni_gcm_dec_update_avx_gen2)
+
+###############################################################################
+#void   aesni_gcm_finalize_avx_gen2(
+#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
+#        gcm_context_data *data,
+#        u8      *auth_tag, /* Authenticated Tag output. */
+#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
+#                              Valid values are 16 (most likely), 12 or 8. */
+###############################################################################
+ENTRY(aesni_gcm_finalize_avx_gen2)
+        FUNC_SAVE
+        mov    keysize,%eax
+        cmp     $32, %eax
+        je      key_256_finalize
+        cmp     $16, %eax
+        je      key_128_finalize
+        # must be 192
+        GCM_COMPLETE GHASH_MUL_AVX, 11, arg3, arg4
+        FUNC_RESTORE
+        ret
+key_128_finalize:
+        GCM_COMPLETE GHASH_MUL_AVX, 9, arg3, arg4
+        FUNC_RESTORE
+        ret
+key_256_finalize:
+        GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
+        FUNC_RESTORE
+        ret
+ENDPROC(aesni_gcm_finalize_avx_gen2)
+
 #endif /* CONFIG_AS_AVX */
 
 #ifdef CONFIG_AS_AVX2
@@ -2724,24 +2735,23 @@ _initial_blocks_done\@:
 
 
 #############################################################
-#void   aesni_gcm_precomp_avx_gen4
+#void   aesni_gcm_init_avx_gen4
 #        (gcm_data     *my_ctx_data,
 #         gcm_context_data *data,
-#        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
 #        u8      *iv, /* Pre-counter block j0: 4 byte salt
 #                      (from Security Association) concatenated with 8 byte
 #                      Initialisation Vector (from IPSec ESP Payload)
 #                      concatenated with 0x00000001. 16-byte aligned pointer. */
+#        u8     *hash_subkey# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */
 #        const   u8 *aad, /* Additional Authentication Data (AAD)*/
 #        u64     aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
 #############################################################
-ENTRY(aesni_gcm_precomp_avx_gen4)
+ENTRY(aesni_gcm_init_avx_gen4)
         FUNC_SAVE
         INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_precomp_avx_gen4)
-
+ENDPROC(aesni_gcm_init_avx_gen4)
 
 ###############################################################################
 #void   aesni_gcm_enc_avx_gen4(
@@ -2749,74 +2759,85 @@ ENDPROC(aesni_gcm_precomp_avx_gen4)
 #        gcm_context_data *data,
 #        u8      *out, /* Ciphertext output. Encrypt in-place is allowed.  */
 #        const   u8 *in, /* Plaintext input */
-#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
-#        u8      *iv, /* Pre-counter block j0: 4 byte salt
-#                      (from Security Association) concatenated with 8 byte
-#                       Initialisation Vector (from IPSec ESP Payload)
-#                       concatenated with 0x00000001. 16-byte aligned pointer. */
-#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
-#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-#        u8      *auth_tag, /* Authenticated Tag output. */
-#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
-#                              Valid values are 16 (most likely), 12 or 8. */
+#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_enc_avx_gen4)
+ENTRY(aesni_gcm_enc_update_avx_gen4)
         FUNC_SAVE
         mov     keysize,%eax
         cmp     $32, %eax
-        je      key_256_enc4
+        je      key_256_enc_update4
         cmp     $16, %eax
-        je      key_128_enc4
+        je      key_128_enc_update4
         # must be 192
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 11
         FUNC_RESTORE
        ret
-key_128_enc4:
+key_128_enc_update4:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 9
         FUNC_RESTORE
        ret
-key_256_enc4:
+key_256_enc_update4:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
         FUNC_RESTORE
        ret
-ENDPROC(aesni_gcm_enc_avx_gen4)
+ENDPROC(aesni_gcm_enc_update_avx_gen4)
 
 ###############################################################################
-#void   aesni_gcm_dec_avx_gen4(
+#void   aesni_gcm_dec_update_avx_gen4(
 #        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
 #        gcm_context_data *data,
 #        u8      *out, /* Plaintext output. Decrypt in-place is allowed.  */
 #        const   u8 *in, /* Ciphertext input */
-#        u64     plaintext_len, /* Length of data in Bytes for encryption. */
-#        u8      *iv, /* Pre-counter block j0: 4 byte salt
-#                      (from Security Association) concatenated with 8 byte
-#                      Initialisation Vector (from IPSec ESP Payload)
-#                      concatenated with 0x00000001. 16-byte aligned pointer. */
-#        const   u8 *aad, /* Additional Authentication Data (AAD)*/
-#        u64     aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
-#        u8      *auth_tag, /* Authenticated Tag output. */
-#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
-#                              Valid values are 16 (most likely), 12 or 8. */
+#        u64     plaintext_len) /* Length of data in Bytes for encryption. */
 ###############################################################################
-ENTRY(aesni_gcm_dec_avx_gen4)
+ENTRY(aesni_gcm_dec_update_avx_gen4)
         FUNC_SAVE
         mov     keysize,%eax
         cmp     $32, %eax
-        je      key_256_dec4
+        je      key_256_dec_update4
         cmp     $16, %eax
-        je      key_128_dec4
+        je      key_128_dec_update4
         # must be 192
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 11
         FUNC_RESTORE
         ret
-key_128_dec4:
+key_128_dec_update4:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 9
         FUNC_RESTORE
         ret
-key_256_dec4:
+key_256_dec_update4:
         GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
         FUNC_RESTORE
         ret
-ENDPROC(aesni_gcm_dec_avx_gen4)
+ENDPROC(aesni_gcm_dec_update_avx_gen4)
+
+###############################################################################
+#void   aesni_gcm_finalize_avx_gen4(
+#        gcm_data        *my_ctx_data,     /* aligned to 16 Bytes */
+#        gcm_context_data *data,
+#        u8      *auth_tag, /* Authenticated Tag output. */
+#        u64     auth_tag_len)# /* Authenticated Tag Length in bytes.
+#                              Valid values are 16 (most likely), 12 or 8. */
+###############################################################################
+ENTRY(aesni_gcm_finalize_avx_gen4)
+        FUNC_SAVE
+        mov    keysize,%eax
+        cmp     $32, %eax
+        je      key_256_finalize4
+        cmp     $16, %eax
+        je      key_128_finalize4
+        # must be 192
+        GCM_COMPLETE GHASH_MUL_AVX2, 11, arg3, arg4
+        FUNC_RESTORE
+        ret
+key_128_finalize4:
+        GCM_COMPLETE GHASH_MUL_AVX2, 9, arg3, arg4
+        FUNC_RESTORE
+        ret
+key_256_finalize4:
+        GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
+        FUNC_RESTORE
+        ret
+ENDPROC(aesni_gcm_finalize_avx_gen4)
 
 #endif /* CONFIG_AS_AVX2 */
index 2648842f1c3f44ae6c1832440338f327f2e88218..1321700d6647f8be1f7894e00405c8ad40e51c38 100644 (file)
@@ -175,6 +175,32 @@ asmlinkage void aesni_gcm_finalize(void *ctx,
                                   struct gcm_context_data *gdata,
                                   u8 *auth_tag, unsigned long auth_tag_len);
 
+static struct aesni_gcm_tfm_s {
+void (*init)(void *ctx,
+                               struct gcm_context_data *gdata,
+                               u8 *iv,
+                               u8 *hash_subkey, const u8 *aad,
+                               unsigned long aad_len);
+void (*enc_update)(void *ctx,
+                                       struct gcm_context_data *gdata, u8 *out,
+                                       const u8 *in,
+                                       unsigned long plaintext_len);
+void (*dec_update)(void *ctx,
+                                       struct gcm_context_data *gdata, u8 *out,
+                                       const u8 *in,
+                                       unsigned long ciphertext_len);
+void (*finalize)(void *ctx,
+                               struct gcm_context_data *gdata,
+                               u8 *auth_tag, unsigned long auth_tag_len);
+} *aesni_gcm_tfm;
+
+struct aesni_gcm_tfm_s aesni_gcm_tfm_sse = {
+       .init = &aesni_gcm_init,
+       .enc_update = &aesni_gcm_enc_update,
+       .dec_update = &aesni_gcm_dec_update,
+       .finalize = &aesni_gcm_finalize,
+};
+
 #ifdef CONFIG_AS_AVX
 asmlinkage void aes_ctr_enc_128_avx_by8(const u8 *in, u8 *iv,
                void *keys, u8 *out, unsigned int num_bytes);
@@ -183,16 +209,27 @@ asmlinkage void aes_ctr_enc_192_avx_by8(const u8 *in, u8 *iv,
 asmlinkage void aes_ctr_enc_256_avx_by8(const u8 *in, u8 *iv,
                void *keys, u8 *out, unsigned int num_bytes);
 /*
- * asmlinkage void aesni_gcm_precomp_avx_gen2()
+ * asmlinkage void aesni_gcm_init_avx_gen2()
  * gcm_data *my_ctx_data, context data
  * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
  */
-asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data,
-                                          struct gcm_context_data *gdata,
-                                          u8 *hash_subkey,
-                                          u8 *iv,
-                                          const u8 *aad,
-                                          unsigned long aad_len);
+asmlinkage void aesni_gcm_init_avx_gen2(void *my_ctx_data,
+                                       struct gcm_context_data *gdata,
+                                       u8 *iv,
+                                       u8 *hash_subkey,
+                                       const u8 *aad,
+                                       unsigned long aad_len);
+
+asmlinkage void aesni_gcm_enc_update_avx_gen2(void *ctx,
+                                    struct gcm_context_data *gdata, u8 *out,
+                                    const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update_avx_gen2(void *ctx,
+                                    struct gcm_context_data *gdata, u8 *out,
+                                    const u8 *in,
+                                    unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize_avx_gen2(void *ctx,
+                                  struct gcm_context_data *gdata,
+                                  u8 *auth_tag, unsigned long auth_tag_len);
 
 asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx,
                                struct gcm_context_data *gdata, u8 *out,
@@ -206,55 +243,38 @@ asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx,
                        const u8 *aad, unsigned long aad_len,
                        u8 *auth_tag, unsigned long auth_tag_len);
 
-static void aesni_gcm_enc_avx(void *ctx,
-                       struct gcm_context_data *data, u8 *out,
-                       const u8 *in, unsigned long plaintext_len, u8 *iv,
-                       u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-                       u8 *auth_tag, unsigned long auth_tag_len)
-{
-       if (plaintext_len < AVX_GEN2_OPTSIZE) {
-               aesni_gcm_enc(ctx, data, out, in,
-                       plaintext_len, iv, hash_subkey, aad,
-                       aad_len, auth_tag, auth_tag_len);
-       } else {
-               aesni_gcm_precomp_avx_gen2(ctx, data, hash_subkey, iv,
-                                          aad, aad_len);
-               aesni_gcm_enc_avx_gen2(ctx, data, out, in, plaintext_len, iv,
-                                      aad, aad_len, auth_tag, auth_tag_len);
-       }
-}
+struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen2 = {
+       .init = &aesni_gcm_init_avx_gen2,
+       .enc_update = &aesni_gcm_enc_update_avx_gen2,
+       .dec_update = &aesni_gcm_dec_update_avx_gen2,
+       .finalize = &aesni_gcm_finalize_avx_gen2,
+};
 
-static void aesni_gcm_dec_avx(void *ctx,
-                       struct gcm_context_data *data, u8 *out,
-                       const u8 *in, unsigned long ciphertext_len, u8 *iv,
-                       u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-                       u8 *auth_tag, unsigned long auth_tag_len)
-{
-       if (ciphertext_len < AVX_GEN2_OPTSIZE) {
-               aesni_gcm_dec(ctx, data, out, in,
-                       ciphertext_len, iv, hash_subkey, aad,
-                       aad_len, auth_tag, auth_tag_len);
-       } else {
-               aesni_gcm_precomp_avx_gen2(ctx, data, hash_subkey, iv,
-                                          aad, aad_len);
-               aesni_gcm_dec_avx_gen2(ctx, data, out, in, ciphertext_len, iv,
-                                      aad, aad_len, auth_tag, auth_tag_len);
-       }
-}
 #endif
 
 #ifdef CONFIG_AS_AVX2
 /*
- * asmlinkage void aesni_gcm_precomp_avx_gen4()
+ * asmlinkage void aesni_gcm_init_avx_gen4()
  * gcm_data *my_ctx_data, context data
  * u8 *hash_subkey,  the Hash sub key input. Data starts on a 16-byte boundary.
  */
-asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data,
-                                          struct gcm_context_data *gdata,
-                                          u8 *hash_subkey,
-                                          u8 *iv,
-                                          const u8 *aad,
-                                          unsigned long aad_len);
+asmlinkage void aesni_gcm_init_avx_gen4(void *my_ctx_data,
+                                       struct gcm_context_data *gdata,
+                                       u8 *iv,
+                                       u8 *hash_subkey,
+                                       const u8 *aad,
+                                       unsigned long aad_len);
+
+asmlinkage void aesni_gcm_enc_update_avx_gen4(void *ctx,
+                                    struct gcm_context_data *gdata, u8 *out,
+                                    const u8 *in, unsigned long plaintext_len);
+asmlinkage void aesni_gcm_dec_update_avx_gen4(void *ctx,
+                                    struct gcm_context_data *gdata, u8 *out,
+                                    const u8 *in,
+                                    unsigned long ciphertext_len);
+asmlinkage void aesni_gcm_finalize_avx_gen4(void *ctx,
+                                  struct gcm_context_data *gdata,
+                                  u8 *auth_tag, unsigned long auth_tag_len);
 
 asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx,
                                struct gcm_context_data *gdata, u8 *out,
@@ -268,67 +288,15 @@ asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx,
                        const u8 *aad, unsigned long aad_len,
                        u8 *auth_tag, unsigned long auth_tag_len);
 
-static void aesni_gcm_enc_avx2(void *ctx,
-                       struct gcm_context_data *data, u8 *out,
-                       const u8 *in, unsigned long plaintext_len, u8 *iv,
-                       u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-                       u8 *auth_tag, unsigned long auth_tag_len)
-{
-       if (plaintext_len < AVX_GEN2_OPTSIZE) {
-               aesni_gcm_enc(ctx, data, out, in,
-                             plaintext_len, iv, hash_subkey, aad,
-                             aad_len, auth_tag, auth_tag_len);
-       } else if (plaintext_len < AVX_GEN4_OPTSIZE) {
-               aesni_gcm_precomp_avx_gen2(ctx, data, hash_subkey, iv,
-                                          aad, aad_len);
-               aesni_gcm_enc_avx_gen2(ctx, data, out, in, plaintext_len, iv,
-                                      aad, aad_len, auth_tag, auth_tag_len);
-       } else {
-               aesni_gcm_precomp_avx_gen4(ctx, data, hash_subkey, iv,
-                                          aad, aad_len);
-               aesni_gcm_enc_avx_gen4(ctx, data, out, in, plaintext_len, iv,
-                                      aad, aad_len, auth_tag, auth_tag_len);
-       }
-}
+struct aesni_gcm_tfm_s aesni_gcm_tfm_avx_gen4 = {
+       .init = &aesni_gcm_init_avx_gen4,
+       .enc_update = &aesni_gcm_enc_update_avx_gen4,
+       .dec_update = &aesni_gcm_dec_update_avx_gen4,
+       .finalize = &aesni_gcm_finalize_avx_gen4,
+};
 
-static void aesni_gcm_dec_avx2(void *ctx,
-       struct gcm_context_data *data, u8 *out,
-                       const u8 *in, unsigned long ciphertext_len, u8 *iv,
-                       u8 *hash_subkey, const u8 *aad, unsigned long aad_len,
-                       u8 *auth_tag, unsigned long auth_tag_len)
-{
-       if (ciphertext_len < AVX_GEN2_OPTSIZE) {
-               aesni_gcm_dec(ctx, data, out, in,
-                             ciphertext_len, iv, hash_subkey,
-                             aad, aad_len, auth_tag, auth_tag_len);
-       } else if (ciphertext_len < AVX_GEN4_OPTSIZE) {
-               aesni_gcm_precomp_avx_gen2(ctx, data, hash_subkey, iv,
-                                          aad, aad_len);
-               aesni_gcm_dec_avx_gen2(ctx, data, out, in, ciphertext_len, iv,
-                                      aad, aad_len, auth_tag, auth_tag_len);
-       } else {
-               aesni_gcm_precomp_avx_gen4(ctx, data, hash_subkey, iv,
-                                          aad, aad_len);
-               aesni_gcm_dec_avx_gen4(ctx, data, out, in, ciphertext_len, iv,
-                                      aad, aad_len, auth_tag, auth_tag_len);
-       }
-}
 #endif
 
-static void (*aesni_gcm_enc_tfm)(void *ctx,
-                                struct gcm_context_data *data, u8 *out,
-                                const u8 *in, unsigned long plaintext_len,
-                                u8 *iv, u8 *hash_subkey, const u8 *aad,
-                                unsigned long aad_len, u8 *auth_tag,
-                                unsigned long auth_tag_len);
-
-static void (*aesni_gcm_dec_tfm)(void *ctx,
-                                struct gcm_context_data *data, u8 *out,
-                                const u8 *in, unsigned long ciphertext_len,
-                                u8 *iv, u8 *hash_subkey, const u8 *aad,
-                                unsigned long aad_len, u8 *auth_tag,
-                                unsigned long auth_tag_len);
-
 static inline struct
 aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm)
 {
@@ -810,6 +778,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 {
        struct crypto_aead *tfm = crypto_aead_reqtfm(req);
        unsigned long auth_tag_len = crypto_aead_authsize(tfm);
+       struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm;
        struct gcm_context_data data AESNI_ALIGN_ATTR;
        struct scatter_walk dst_sg_walk = {};
        unsigned long left = req->cryptlen;
@@ -827,6 +796,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
        if (!enc)
                left -= auth_tag_len;
 
+#ifdef CONFIG_AS_AVX2
+       if (left < AVX_GEN4_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen4)
+               gcm_tfm = &aesni_gcm_tfm_avx_gen2;
+#endif
+#ifdef CONFIG_AS_AVX
+       if (left < AVX_GEN2_OPTSIZE && gcm_tfm == &aesni_gcm_tfm_avx_gen2)
+               gcm_tfm = &aesni_gcm_tfm_sse;
+#endif
+
        /* Linearize assoc, if not already linear */
        if (req->src->length >= assoclen && req->src->length &&
                (!PageHighMem(sg_page(req->src)) ||
@@ -851,7 +829,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
        }
 
        kernel_fpu_begin();
-       aesni_gcm_init(aes_ctx, &data, iv,
+       gcm_tfm->init(aes_ctx, &data, iv,
                hash_subkey, assoc, assoclen);
        if (req->src != req->dst) {
                while (left) {
@@ -862,10 +840,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
                        len = min(srclen, dstlen);
                        if (len) {
                                if (enc)
-                                       aesni_gcm_enc_update(aes_ctx, &data,
+                                       gcm_tfm->enc_update(aes_ctx, &data,
                                                             dst, src, len);
                                else
-                                       aesni_gcm_dec_update(aes_ctx, &data,
+                                       gcm_tfm->dec_update(aes_ctx, &data,
                                                             dst, src, len);
                        }
                        left -= len;
@@ -883,10 +861,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
                        len = scatterwalk_clamp(&src_sg_walk, left);
                        if (len) {
                                if (enc)
-                                       aesni_gcm_enc_update(aes_ctx, &data,
+                                       gcm_tfm->enc_update(aes_ctx, &data,
                                                             src, src, len);
                                else
-                                       aesni_gcm_dec_update(aes_ctx, &data,
+                                       gcm_tfm->dec_update(aes_ctx, &data,
                                                             src, src, len);
                        }
                        left -= len;
@@ -895,7 +873,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
                        scatterwalk_done(&src_sg_walk, 1, left);
                }
        }
-       aesni_gcm_finalize(aes_ctx, &data, authTag, auth_tag_len);
+       gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len);
        kernel_fpu_end();
 
        if (!assocmem)
@@ -928,145 +906,15 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req,
 static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen,
                          u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
-       u8 one_entry_in_sg = 0;
-       u8 *src, *dst, *assoc;
-       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-       unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-       struct scatter_walk src_sg_walk;
-       struct scatter_walk dst_sg_walk = {};
-       struct gcm_context_data data AESNI_ALIGN_ATTR;
-
-       if (aesni_gcm_enc_tfm == aesni_gcm_enc ||
-               req->cryptlen < AVX_GEN2_OPTSIZE) {
-               return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
-                                         aes_ctx);
-       }
-       if (sg_is_last(req->src) &&
-           (!PageHighMem(sg_page(req->src)) ||
-           req->src->offset + req->src->length <= PAGE_SIZE) &&
-           sg_is_last(req->dst) &&
-           (!PageHighMem(sg_page(req->dst)) ||
-           req->dst->offset + req->dst->length <= PAGE_SIZE)) {
-               one_entry_in_sg = 1;
-               scatterwalk_start(&src_sg_walk, req->src);
-               assoc = scatterwalk_map(&src_sg_walk);
-               src = assoc + req->assoclen;
-               dst = src;
-               if (unlikely(req->src != req->dst)) {
-                       scatterwalk_start(&dst_sg_walk, req->dst);
-                       dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
-               }
-       } else {
-               /* Allocate memory for src, dst, assoc */
-               assoc = kmalloc(req->cryptlen + auth_tag_len + req->assoclen,
-                       GFP_ATOMIC);
-               if (unlikely(!assoc))
-                       return -ENOMEM;
-               scatterwalk_map_and_copy(assoc, req->src, 0,
-                                        req->assoclen + req->cryptlen, 0);
-               src = assoc + req->assoclen;
-               dst = src;
-       }
-
-       kernel_fpu_begin();
-       aesni_gcm_enc_tfm(aes_ctx, &data, dst, src, req->cryptlen, iv,
-                         hash_subkey, assoc, assoclen,
-                         dst + req->cryptlen, auth_tag_len);
-       kernel_fpu_end();
-
-       /* The authTag (aka the Integrity Check Value) needs to be written
-        * back to the packet. */
-       if (one_entry_in_sg) {
-               if (unlikely(req->src != req->dst)) {
-                       scatterwalk_unmap(dst - req->assoclen);
-                       scatterwalk_advance(&dst_sg_walk, req->dst->length);
-                       scatterwalk_done(&dst_sg_walk, 1, 0);
-               }
-               scatterwalk_unmap(assoc);
-               scatterwalk_advance(&src_sg_walk, req->src->length);
-               scatterwalk_done(&src_sg_walk, req->src == req->dst, 0);
-       } else {
-               scatterwalk_map_and_copy(dst, req->dst, req->assoclen,
-                                        req->cryptlen + auth_tag_len, 1);
-               kfree(assoc);
-       }
-       return 0;
+       return gcmaes_crypt_by_sg(true, req, assoclen, hash_subkey, iv,
+                               aes_ctx);
 }
 
 static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen,
                          u8 *hash_subkey, u8 *iv, void *aes_ctx)
 {
-       u8 one_entry_in_sg = 0;
-       u8 *src, *dst, *assoc;
-       unsigned long tempCipherLen = 0;
-       struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-       unsigned long auth_tag_len = crypto_aead_authsize(tfm);
-       u8 authTag[16];
-       struct scatter_walk src_sg_walk;
-       struct scatter_walk dst_sg_walk = {};
-       struct gcm_context_data data AESNI_ALIGN_ATTR;
-       int retval = 0;
-
-       if (aesni_gcm_enc_tfm == aesni_gcm_enc ||
-               req->cryptlen < AVX_GEN2_OPTSIZE) {
-               return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
-                                         aes_ctx);
-       }
-       tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len);
-
-       if (sg_is_last(req->src) &&
-           (!PageHighMem(sg_page(req->src)) ||
-           req->src->offset + req->src->length <= PAGE_SIZE) &&
-           sg_is_last(req->dst) && req->dst->length &&
-           (!PageHighMem(sg_page(req->dst)) ||
-           req->dst->offset + req->dst->length <= PAGE_SIZE)) {
-               one_entry_in_sg = 1;
-               scatterwalk_start(&src_sg_walk, req->src);
-               assoc = scatterwalk_map(&src_sg_walk);
-               src = assoc + req->assoclen;
-               dst = src;
-               if (unlikely(req->src != req->dst)) {
-                       scatterwalk_start(&dst_sg_walk, req->dst);
-                       dst = scatterwalk_map(&dst_sg_walk) + req->assoclen;
-               }
-       } else {
-               /* Allocate memory for src, dst, assoc */
-               assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
-               if (!assoc)
-                       return -ENOMEM;
-               scatterwalk_map_and_copy(assoc, req->src, 0,
-                                        req->assoclen + req->cryptlen, 0);
-               src = assoc + req->assoclen;
-               dst = src;
-       }
-
-
-       kernel_fpu_begin();
-       aesni_gcm_dec_tfm(aes_ctx, &data, dst, src, tempCipherLen, iv,
-                         hash_subkey, assoc, assoclen,
-                         authTag, auth_tag_len);
-       kernel_fpu_end();
-
-       /* Compare generated tag with passed in tag. */
-       retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ?
-               -EBADMSG : 0;
-
-       if (one_entry_in_sg) {
-               if (unlikely(req->src != req->dst)) {
-                       scatterwalk_unmap(dst - req->assoclen);
-                       scatterwalk_advance(&dst_sg_walk, req->dst->length);
-                       scatterwalk_done(&dst_sg_walk, 1, 0);
-               }
-               scatterwalk_unmap(assoc);
-               scatterwalk_advance(&src_sg_walk, req->src->length);
-               scatterwalk_done(&src_sg_walk, req->src == req->dst, 0);
-       } else {
-               scatterwalk_map_and_copy(dst, req->dst, req->assoclen,
-                                        tempCipherLen, 1);
-               kfree(assoc);
-       }
-       return retval;
-
+       return gcmaes_crypt_by_sg(false, req, assoclen, hash_subkey, iv,
+                               aes_ctx);
 }
 
 static int helper_rfc4106_encrypt(struct aead_request *req)
@@ -1434,21 +1282,18 @@ static int __init aesni_init(void)
 #ifdef CONFIG_AS_AVX2
        if (boot_cpu_has(X86_FEATURE_AVX2)) {
                pr_info("AVX2 version of gcm_enc/dec engaged.\n");
-               aesni_gcm_enc_tfm = aesni_gcm_enc_avx2;
-               aesni_gcm_dec_tfm = aesni_gcm_dec_avx2;
+               aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen4;
        } else
 #endif
 #ifdef CONFIG_AS_AVX
        if (boot_cpu_has(X86_FEATURE_AVX)) {
                pr_info("AVX version of gcm_enc/dec engaged.\n");
-               aesni_gcm_enc_tfm = aesni_gcm_enc_avx;
-               aesni_gcm_dec_tfm = aesni_gcm_dec_avx;
+               aesni_gcm_tfm = &aesni_gcm_tfm_avx_gen2;
        } else
 #endif
        {
                pr_info("SSE version of gcm_enc/dec engaged.\n");
-               aesni_gcm_enc_tfm = aesni_gcm_enc;
-               aesni_gcm_dec_tfm = aesni_gcm_dec;
+               aesni_gcm_tfm = &aesni_gcm_tfm_sse;
        }
        aesni_ctr_enc_tfm = aesni_ctr_enc;
 #ifdef CONFIG_AS_AVX