s390/fpu: decrease stack usage for some cases

author Heiko Carstens <hca@linux.ibm.com>

Sat, 3 Feb 2024 10:45:17 +0000 (11:45 +0100)

committer Heiko Carstens <hca@linux.ibm.com>

Fri, 16 Feb 2024 13:30:16 +0000 (14:30 +0100)
author Heiko Carstens <hca@linux.ibm.com>
Sat, 3 Feb 2024 10:45:17 +0000 (11:45 +0100)
committer Heiko Carstens <hca@linux.ibm.com>
Fri, 16 Feb 2024 13:30:16 +0000 (14:30 +0100)
diff --git a/arch/s390/crypto/chacha-glue.c b/arch/s390/crypto/chacha-glue.c

index 97098add2079136267e61f7a3eebf6f488de4240..f8b0c52e77a4fab95de18f8cda80e098b4fe5ee1 100644 (file)
--- a/arch/s390/crypto/chacha-glue.c
+++ b/arch/s390/crypto/chacha-glue.c
@@ -22,7 +22,7 @@ static void chacha20_crypt_s390(u32 *state, u8 *dst, const u8 *src,
                                 unsigned int nbytes, const u32 *key,
                                 u32 *counter)
  {
-       DECLARE_KERNEL_FPU_ONSTACK(vxstate);
+       DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
  
         kernel_fpu_begin(&vxstate, KERNEL_VXR);
         chacha20_vx(dst, src, nbytes, key, counter);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c

index dc2997f18e30593245577aa510220bfc0c4024f0..d9f1fdb66691bb4b4c09d406d64f786d08cf47e8 100644 (file)
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -50,7 +50,7 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size);
                                 unsigned char const *data, size_t datalen)  \
         {                                                                   \
                 unsigned long prealign, aligned, remaining;                 \
-               DECLARE_KERNEL_FPU_ONSTACK(vxstate);                        \
+               DECLARE_KERNEL_FPU_ONSTACK16(vxstate);                      \
                                                                             \
                 if (datalen < VX_MIN_LEN + VX_ALIGN_MASK)                   \
                         return ___crc32_sw(crc, data, datalen);             \
diff --git a/arch/s390/include/asm/fpu-types.h b/arch/s390/include/asm/fpu-types.h

index 8e6927c23bdc4f0be4820b2d195d21fe79c095b3..04c32b9fc849862137c51763ebe198879da6710e 100644 (file)
--- a/arch/s390/include/asm/fpu-types.h
+++ b/arch/s390/include/asm/fpu-types.h
@@ -16,14 +16,32 @@ struct fpu {
         __vector128 vxrs[__NUM_VXRS] __aligned(8);
  };
  
-/* In-kernel FPU state structure */
+struct kernel_fpu_hdr {
+       int     mask;
+       u32     fpc;
+};
+
  struct kernel_fpu {
-       int         mask;
-       u32         fpc;
-       __vector128 vxrs[__NUM_VXRS] __aligned(8);
+       struct kernel_fpu_hdr hdr;
+       __vector128 vxrs[] __aligned(8);
  };
  
-#define DECLARE_KERNEL_FPU_ONSTACK(name)       \
-       struct kernel_fpu name __uninitialized
+#define KERNEL_FPU_STRUCT(vxr_size)                            \
+struct kernel_fpu_##vxr_size {                                 \
+       struct kernel_fpu_hdr hdr;                              \
+       __vector128 vxrs[vxr_size] __aligned(8);                \
+}
+
+KERNEL_FPU_STRUCT(16);
+KERNEL_FPU_STRUCT(32);
+
+#define DECLARE_KERNEL_FPU_ONSTACK(vxr_size, name)             \
+       struct kernel_fpu_##vxr_size name __uninitialized
+
+#define DECLARE_KERNEL_FPU_ONSTACK16(name)                     \
+       DECLARE_KERNEL_FPU_ONSTACK(16, name)
+
+#define DECLARE_KERNEL_FPU_ONSTACK32(name)                     \
+       DECLARE_KERNEL_FPU_ONSTACK(32, name)
  
  #endif /* _ASM_S390_FPU_TYPES_H */
diff --git a/arch/s390/include/asm/fpu.h b/arch/s390/include/asm/fpu.h

index e706af26c5d07fa84a333b36239d609f0df66f89..c1b3920092a1c96127b06ce3a4448f2709436c5f 100644 (file)
--- a/arch/s390/include/asm/fpu.h
+++ b/arch/s390/include/asm/fpu.h
@@ -162,28 +162,64 @@ static __always_inline void load_fp_regs_vx(__vector128 *vxrs)
         __load_fp_regs(fprs, sizeof(__vector128) / sizeof(freg_t));
  }
  
-static inline void kernel_fpu_begin(struct kernel_fpu *state, int flags)
+static inline void _kernel_fpu_begin(struct kernel_fpu *state, int flags)
  {
-       state->mask = READ_ONCE(current->thread.kfpu_flags);
+       state->hdr.mask = READ_ONCE(current->thread.kfpu_flags);
         if (!test_thread_flag(TIF_FPU)) {
                 /* Save user space FPU state and register contents */
                 save_user_fpu_regs();
-       } else if (state->mask & flags) {
+       } else if (state->hdr.mask & flags) {
                 /* Save FPU/vector register in-use by the kernel */
                 __kernel_fpu_begin(state, flags);
         }
         __atomic_or(flags, &current->thread.kfpu_flags);
  }
  
-static inline void kernel_fpu_end(struct kernel_fpu *state, int flags)
+static inline void _kernel_fpu_end(struct kernel_fpu *state, int flags)
  {
-       WRITE_ONCE(current->thread.kfpu_flags, state->mask);
-       if (state->mask & flags) {
+       WRITE_ONCE(current->thread.kfpu_flags, state->hdr.mask);
+       if (state->hdr.mask & flags) {
                 /* Restore FPU/vector register in-use by the kernel */
                 __kernel_fpu_end(state, flags);
         }
  }
  
+void __kernel_fpu_invalid_size(void);
+
+static __always_inline void kernel_fpu_check_size(int flags, unsigned int size)
+{
+       unsigned int cnt = 0;
+
+       if (flags & KERNEL_VXR_V0V7)
+               cnt += 8;
+       if (flags & KERNEL_VXR_V8V15)
+               cnt += 8;
+       if (flags & KERNEL_VXR_V16V23)
+               cnt += 8;
+       if (flags & KERNEL_VXR_V24V31)
+               cnt += 8;
+       if (cnt != size)
+               __kernel_fpu_invalid_size();
+}
+
+#define kernel_fpu_begin(state, flags)                                 \
+{                                                                      \
+       typeof(state) s = (state);                                      \
+       int _flags = (flags);                                           \
+                                                                       \
+       kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));             \
+       _kernel_fpu_begin((struct kernel_fpu *)s, _flags);              \
+}
+
+#define kernel_fpu_end(state, flags)                                   \
+{                                                                      \
+       typeof(state) s = (state);                                      \
+       int _flags = (flags);                                           \
+                                                                       \
+       kernel_fpu_check_size(_flags, ARRAY_SIZE(s->vxrs));             \
+       _kernel_fpu_end((struct kernel_fpu *)s, _flags);                \
+}
+
  static inline void save_kernel_fpu_regs(struct thread_struct *thread)
  {
         struct fpu *state = &thread->kfpu;
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c

index 733e188951b78ed529becee21f47e80d38e3aae5..62e9befe7890a2806d0515ad3660eddde6d7946c 100644 (file)
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -19,41 +19,41 @@ void __kernel_fpu_begin(struct kernel_fpu *state, int flags)
          * Limit the save to the FPU/vector registers already
          * in use by the previous context.
          */
-       flags &= state->mask;
+       flags &= state->hdr.mask;
         if (flags & KERNEL_FPC)
-               fpu_stfpc(&state->fpc);
+               fpu_stfpc(&state->hdr.fpc);
         if (!cpu_has_vx()) {
                 if (flags & KERNEL_VXR_LOW)
-                       save_fp_regs_vx(state->vxrs);
+                       save_fp_regs_vx(vxrs);
                 return;
         }
         mask = flags & KERNEL_VXR;
         if (mask == KERNEL_VXR) {
-               fpu_vstm(0, 15, &vxrs[0]);
-               fpu_vstm(16, 31, &vxrs[16]);
+               vxrs += fpu_vstm(0, 15, vxrs);
+               vxrs += fpu_vstm(16, 31, vxrs);
                 return;
         }
         if (mask == KERNEL_VXR_MID) {
-               fpu_vstm(8, 23, &vxrs[8]);
+               vxrs += fpu_vstm(8, 23, vxrs);
                 return;
         }
         mask = flags & KERNEL_VXR_LOW;
         if (mask) {
                 if (mask == KERNEL_VXR_LOW)
-                       fpu_vstm(0, 15, &vxrs[0]);
+                       vxrs += fpu_vstm(0, 15, vxrs);
                 else if (mask == KERNEL_VXR_V0V7)
-                       fpu_vstm(0, 7, &vxrs[0]);
+                       vxrs += fpu_vstm(0, 7, vxrs);
                 else
-                       fpu_vstm(8, 15, &vxrs[8]);
+                       vxrs += fpu_vstm(8, 15, vxrs);
         }
         mask = flags & KERNEL_VXR_HIGH;
         if (mask) {
                 if (mask == KERNEL_VXR_HIGH)
-                       fpu_vstm(16, 31, &vxrs[16]);
+                       vxrs += fpu_vstm(16, 31, vxrs);
                 else if (mask == KERNEL_VXR_V16V23)
-                       fpu_vstm(16, 23, &vxrs[16]);
+                       vxrs += fpu_vstm(16, 23, vxrs);
                 else
-                       fpu_vstm(24, 31, &vxrs[24]);
+                       vxrs += fpu_vstm(24, 31, vxrs);
         }
  }
  EXPORT_SYMBOL(__kernel_fpu_begin);
@@ -68,41 +68,41 @@ void __kernel_fpu_end(struct kernel_fpu *state, int flags)
          * previous context that have been overwritten by the
          * current context.
          */
-       flags &= state->mask;
+       flags &= state->hdr.mask;
         if (flags & KERNEL_FPC)
-               fpu_lfpc(&state->fpc);
+               fpu_lfpc(&state->hdr.fpc);
         if (!cpu_has_vx()) {
                 if (flags & KERNEL_VXR_LOW)
-                       load_fp_regs_vx(state->vxrs);
+                       load_fp_regs_vx(vxrs);
                 return;
         }
         mask = flags & KERNEL_VXR;
         if (mask == KERNEL_VXR) {
-               fpu_vlm(0, 15, &vxrs[0]);
-               fpu_vlm(16, 31, &vxrs[16]);
+               vxrs += fpu_vlm(0, 15, vxrs);
+               vxrs += fpu_vlm(16, 31, vxrs);
                 return;
         }
         if (mask == KERNEL_VXR_MID) {
-               fpu_vlm(8, 23, &vxrs[8]);
+               vxrs += fpu_vlm(8, 23, vxrs);
                 return;
         }
         mask = flags & KERNEL_VXR_LOW;
         if (mask) {
                 if (mask == KERNEL_VXR_LOW)
-                       fpu_vlm(0, 15, &vxrs[0]);
+                       vxrs += fpu_vlm(0, 15, vxrs);
                 else if (mask == KERNEL_VXR_V0V7)
-                       fpu_vlm(0, 7, &vxrs[0]);
+                       vxrs += fpu_vlm(0, 7, vxrs);
                 else
-                       fpu_vlm(8, 15, &vxrs[8]);
+                       vxrs += fpu_vlm(8, 15, vxrs);
         }
         mask = flags & KERNEL_VXR_HIGH;
         if (mask) {
                 if (mask == KERNEL_VXR_HIGH)
-                       fpu_vlm(16, 31, &vxrs[16]);
+                       vxrs += fpu_vlm(16, 31, vxrs);
                 else if (mask == KERNEL_VXR_V16V23)
-                       fpu_vlm(16, 23, &vxrs[16]);
+                       vxrs += fpu_vlm(16, 23, vxrs);
                 else
-                       fpu_vlm(24, 31, &vxrs[24]);
+                       vxrs += fpu_vlm(24, 31, vxrs);
         }
  }
  EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c

index 061d45cf02618d5a8434f121c7a543c9bdf34f3e..4cd6428bfab2a497d20c528ac176fc3a69fce1e7 100644 (file)
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -426,7 +426,7 @@ subsys_initcall(create_proc_service_level);
   */
  void s390_adjust_jiffies(void)
  {
-       DECLARE_KERNEL_FPU_ONSTACK(fpu);
+       DECLARE_KERNEL_FPU_ONSTACK16(fpu);
         struct sysinfo_1_2_2 *info;
         unsigned long capability;
  
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c

index 8467945344b52c48d2cc5f0cf4775ec99c0b8923..8c222b0dfbf2e7e68a953fd95fbee3a92cf84ae1 100644 (file)
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5026,7 +5026,7 @@ static void store_regs(struct kvm_vcpu *vcpu)
  int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
  {
         struct kvm_run *kvm_run = vcpu->run;
-       DECLARE_KERNEL_FPU_ONSTACK(fpu);
+       DECLARE_KERNEL_FPU_ONSTACK32(fpu);
         int rc;
  
         /*
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc

index bc2f4fbe5a8280107068a82e1f1741dc22173b1f..92c05b7596bcb4e4289635cd0da5f6b55622e9d8 100644 (file)
--- a/lib/raid6/s390vx.uc
+++ b/lib/raid6/s390vx.uc
@@ -80,7 +80,7 @@ static inline void COPY_VEC(int x, int y)
  
  static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
  {
-       DECLARE_KERNEL_FPU_ONSTACK(vxstate);
+       DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
         u8 **dptr, *p, *q;
         int d, z, z0;
  
@@ -113,7 +113,7 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
  static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
                                         size_t bytes, void **ptrs)
  {
-       DECLARE_KERNEL_FPU_ONSTACK(vxstate);
+       DECLARE_KERNEL_FPU_ONSTACK32(vxstate);
         u8 **dptr, *p, *q;
         int d, z, z0;
author	Heiko Carstens <hca@linux.ibm.com>
	Sat, 3 Feb 2024 10:45:17 +0000 (11:45 +0100)
committer	Heiko Carstens <hca@linux.ibm.com>
	Fri, 16 Feb 2024 13:30:16 +0000 (14:30 +0100)
arch/s390/crypto/chacha-glue.c		patch \| blob \| blame \| history
arch/s390/crypto/crc32-vx.c		patch \| blob \| blame \| history
arch/s390/include/asm/fpu-types.h		patch \| blob \| blame \| history
arch/s390/include/asm/fpu.h		patch \| blob \| blame \| history
arch/s390/kernel/fpu.c		patch \| blob \| blame \| history
arch/s390/kernel/sysinfo.c		patch \| blob \| blame \| history
arch/s390/kvm/kvm-s390.c		patch \| blob \| blame \| history
lib/raid6/s390vx.uc		patch \| blob \| blame \| history