s390/fpu: convert __kernel_fpu_begin()/__kernel_fpu_end() to C
authorHeiko Carstens <hca@linux.ibm.com>
Sat, 3 Feb 2024 10:45:08 +0000 (11:45 +0100)
committerHeiko Carstens <hca@linux.ibm.com>
Fri, 16 Feb 2024 13:30:15 +0000 (14:30 +0100)
Convert the rather large __kernel_fpu_begin()/__kernel_fpu_end() inline
assemblies to C. The C variant is much more readable, and this also allows
to get rid of the non-obvious usage of KERNEL_VXR_* constants within the
inline assemblies. E.g. "tmll %[m],6" correlates with the two bits set in
KERNEL_VXR_LOW. If the corresponding defines would be changed, the inline
assembles would break in a subtle way.

Therefore convert to C, use the proper defines, and allow the compiler to
generate code using the (hopefully) most efficient instructions.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
arch/s390/include/asm/fpu-types.h
arch/s390/include/asm/fpu.h
arch/s390/kernel/fpu.c

index 1caaf31209fc7b58b06d3aabbbf6168a9f7b01e2..743858dbc7fbd194a63e55764e3857946ef0c868 100644 (file)
@@ -22,11 +22,6 @@ struct fpu {
        };
 };
 
-/* VX array structure for address operand constraints in inline assemblies */
-struct vx_array {
-       __vector128 _[__NUM_VXRS];
-};
-
 /* In-kernel FPU state structure */
 struct kernel_fpu {
        u32         mask;
index 6a0a23a28ce8616ea61a5a119aa932097205ed0f..be85c28cdcde667b35bb8bb8dea2545c0a07497a 100644 (file)
@@ -61,11 +61,19 @@ void save_fpu_regs(void);
 void load_fpu_regs(void);
 void __load_fpu_regs(void);
 
-#define KERNEL_FPC             1
-#define KERNEL_VXR_V0V7                2
-#define KERNEL_VXR_V8V15       4
-#define KERNEL_VXR_V16V23      8
-#define KERNEL_VXR_V24V31      16
+enum {
+       KERNEL_FPC_BIT = 0,
+       KERNEL_VXR_V0V7_BIT,
+       KERNEL_VXR_V8V15_BIT,
+       KERNEL_VXR_V16V23_BIT,
+       KERNEL_VXR_V24V31_BIT,
+};
+
+#define KERNEL_FPC             BIT(KERNEL_FPC_BIT)
+#define KERNEL_VXR_V0V7                BIT(KERNEL_VXR_V0V7_BIT)
+#define KERNEL_VXR_V8V15       BIT(KERNEL_VXR_V8V15_BIT)
+#define KERNEL_VXR_V16V23      BIT(KERNEL_VXR_V16V23_BIT)
+#define KERNEL_VXR_V24V31      BIT(KERNEL_VXR_V24V31_BIT)
 
 #define KERNEL_VXR_LOW         (KERNEL_VXR_V0V7   | KERNEL_VXR_V8V15)
 #define KERNEL_VXR_MID         (KERNEL_VXR_V8V15  | KERNEL_VXR_V16V23)
index 092a4bdf88edc29fa6fa262e70c0cd77840bad73..0a31408a46f3a030848183f22ec0d655457a6c61 100644 (file)
@@ -12,6 +12,9 @@
 
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
+       __vector128 *vxrs = state->vxrs;
+       u32 mask;
+
        /*
         * Limit the save to the FPU/vector registers already
         * in use by the previous context.
@@ -24,54 +27,42 @@ void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
                        save_fp_regs(state->fprs);
                return;
        }
-       /* Test and save vector registers */
-       asm volatile (
-               /*
-                * Test if any vector register must be saved and, if so,
-                * test if all register can be saved.
-                */
-               "       la      1,%[vxrs]\n"    /* load save area */
-               "       tmll    %[m],30\n"      /* KERNEL_VXR */
-               "       jz      7f\n"           /* no work -> done */
-               "       jo      5f\n"           /* -> save V0..V31 */
-               /*
-                * Test for special case KERNEL_FPU_MID only. In this
-                * case a vstm V8..V23 is the best instruction
-                */
-               "       chi     %[m],12\n"      /* KERNEL_VXR_MID */
-               "       jne     0f\n"           /* -> save V8..V23 */
-               "       VSTM    8,23,128,1\n"   /* vstm %v8,%v23,128(%r1) */
-               "       j       7f\n"
-               /* Test and save the first half of 16 vector registers */
-               "0:     tmll    %[m],6\n"       /* KERNEL_VXR_LOW */
-               "       jz      3f\n"           /* -> KERNEL_VXR_HIGH */
-               "       jo      2f\n"           /* 11 -> save V0..V15 */
-               "       brc     2,1f\n"         /* 10 -> save V8..V15 */
-               "       VSTM    0,7,0,1\n"      /* vstm %v0,%v7,0(%r1) */
-               "       j       3f\n"
-               "1:     VSTM    8,15,128,1\n"   /* vstm %v8,%v15,128(%r1) */
-               "       j       3f\n"
-               "2:     VSTM    0,15,0,1\n"     /* vstm %v0,%v15,0(%r1) */
-               /* Test and save the second half of 16 vector registers */
-               "3:     tmll    %[m],24\n"      /* KERNEL_VXR_HIGH */
-               "       jz      7f\n"
-               "       jo      6f\n"           /* 11 -> save V16..V31 */
-               "       brc     2,4f\n"         /* 10 -> save V24..V31 */
-               "       VSTM    16,23,256,1\n"  /* vstm %v16,%v23,256(%r1) */
-               "       j       7f\n"
-               "4:     VSTM    24,31,384,1\n"  /* vstm %v24,%v31,384(%r1) */
-               "       j       7f\n"
-               "5:     VSTM    0,15,0,1\n"     /* vstm %v0,%v15,0(%r1) */
-               "6:     VSTM    16,31,256,1\n"  /* vstm %v16,%v31,256(%r1) */
-               "7:"
-               : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-               : [m] "d" (flags)
-               : "1", "cc");
+       mask = flags & KERNEL_VXR;
+       if (mask == KERNEL_VXR) {
+               fpu_vstm(0, 15, &vxrs[0]);
+               fpu_vstm(16, 31, &vxrs[16]);
+               return;
+       }
+       if (mask == KERNEL_VXR_MID) {
+               fpu_vstm(8, 23, &vxrs[8]);
+               return;
+       }
+       mask = flags & KERNEL_VXR_LOW;
+       if (mask) {
+               if (mask == KERNEL_VXR_LOW)
+                       fpu_vstm(0, 15, &vxrs[0]);
+               else if (mask == KERNEL_VXR_V0V7)
+                       fpu_vstm(0, 7, &vxrs[0]);
+               else
+                       fpu_vstm(8, 15, &vxrs[8]);
+       }
+       mask = flags & KERNEL_VXR_HIGH;
+       if (mask) {
+               if (mask == KERNEL_VXR_HIGH)
+                       fpu_vstm(16, 31, &vxrs[16]);
+               else if (mask == KERNEL_VXR_V16V23)
+                       fpu_vstm(16, 23, &vxrs[16]);
+               else
+                       fpu_vstm(24, 31, &vxrs[24]);
+       }
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
 void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
+       __vector128 *vxrs = state->vxrs;
+       u32 mask;
+
        /*
         * Limit the restore to the FPU/vector registers of the
         * previous context that have been overwritten by the
@@ -85,49 +76,34 @@ void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
                        load_fp_regs(state->fprs);
                return;
        }
-       /* Test and restore (load) vector registers */
-       asm volatile (
-               /*
-                * Test if any vector register must be loaded and, if so,
-                * test if all registers can be loaded at once.
-                */
-               "       la      1,%[vxrs]\n"    /* load restore area */
-               "       tmll    %[m],30\n"      /* KERNEL_VXR */
-               "       jz      7f\n"           /* no work -> done */
-               "       jo      5f\n"           /* -> restore V0..V31 */
-               /*
-                * Test for special case KERNEL_FPU_MID only. In this
-                * case a vlm V8..V23 is the best instruction
-                */
-               "       chi     %[m],12\n"      /* KERNEL_VXR_MID */
-               "       jne     0f\n"           /* -> restore V8..V23 */
-               "       VLM     8,23,128,1\n"   /* vlm %v8,%v23,128(%r1) */
-               "       j       7f\n"
-               /* Test and restore the first half of 16 vector registers */
-               "0:     tmll    %[m],6\n"       /* KERNEL_VXR_LOW */
-               "       jz      3f\n"           /* -> KERNEL_VXR_HIGH */
-               "       jo      2f\n"           /* 11 -> restore V0..V15 */
-               "       brc     2,1f\n"         /* 10 -> restore V8..V15 */
-               "       VLM     0,7,0,1\n"      /* vlm %v0,%v7,0(%r1) */
-               "       j       3f\n"
-               "1:     VLM     8,15,128,1\n"   /* vlm %v8,%v15,128(%r1) */
-               "       j       3f\n"
-               "2:     VLM     0,15,0,1\n"     /* vlm %v0,%v15,0(%r1) */
-               /* Test and restore the second half of 16 vector registers */
-               "3:     tmll    %[m],24\n"      /* KERNEL_VXR_HIGH */
-               "       jz      7f\n"
-               "       jo      6f\n"           /* 11 -> restore V16..V31 */
-               "       brc     2,4f\n"         /* 10 -> restore V24..V31 */
-               "       VLM     16,23,256,1\n"  /* vlm %v16,%v23,256(%r1) */
-               "       j       7f\n"
-               "4:     VLM     24,31,384,1\n"  /* vlm %v24,%v31,384(%r1) */
-               "       j       7f\n"
-               "5:     VLM     0,15,0,1\n"     /* vlm %v0,%v15,0(%r1) */
-               "6:     VLM     16,31,256,1\n"  /* vlm %v16,%v31,256(%r1) */
-               "7:"
-               : [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-               : [m] "d" (flags)
-               : "1", "cc");
+       mask = flags & KERNEL_VXR;
+       if (mask == KERNEL_VXR) {
+               fpu_vlm(0, 15, &vxrs[0]);
+               fpu_vlm(16, 31, &vxrs[16]);
+               return;
+       }
+       if (mask == KERNEL_VXR_MID) {
+               fpu_vlm(8, 23, &vxrs[8]);
+               return;
+       }
+       mask = flags & KERNEL_VXR_LOW;
+       if (mask) {
+               if (mask == KERNEL_VXR_LOW)
+                       fpu_vlm(0, 15, &vxrs[0]);
+               else if (mask == KERNEL_VXR_V0V7)
+                       fpu_vlm(0, 7, &vxrs[0]);
+               else
+                       fpu_vlm(8, 15, &vxrs[8]);
+       }
+       mask = flags & KERNEL_VXR_HIGH;
+       if (mask) {
+               if (mask == KERNEL_VXR_HIGH)
+                       fpu_vlm(16, 31, &vxrs[16]);
+               else if (mask == KERNEL_VXR_V16V23)
+                       fpu_vlm(16, 23, &vxrs[16]);
+               else
+                       fpu_vlm(24, 31, &vxrs[24]);
+       }
 }
 EXPORT_SYMBOL(__kernel_fpu_end);