arm64/sve: Skip flushing Z registers with 128 bit vectors
authorMark Brown <broonie@kernel.org>
Wed, 12 May 2021 15:11:31 +0000 (16:11 +0100)
committerWill Deacon <will@kernel.org>
Wed, 26 May 2021 19:04:28 +0000 (20:04 +0100)
When the SVE vector length is 128 bits then there are no bits in the Z
registers which are not shared with the V registers so we can skip them
when zeroing state not shared with FPSIMD, this results in a minor
performance improvement.

Signed-off-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://lore.kernel.org/r/20210512151131.27877-4-broonie@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
arch/arm64/include/asm/fpsimd.h
arch/arm64/kernel/entry-fpsimd.S
arch/arm64/kernel/fpsimd.c

index 2599504674b52fa15efd6c08007f0d54b353dfa6..c072161d5c65377a96d505c4d055960586fe96f8 100644 (file)
@@ -69,7 +69,7 @@ static inline void *sve_pffr(struct thread_struct *thread)
 extern void sve_save_state(void *state, u32 *pfpsr);
 extern void sve_load_state(void const *state, u32 const *pfpsr,
                           unsigned long vq_minus_1);
-extern void sve_flush_live(void);
+extern void sve_flush_live(unsigned long vq_minus_1);
 extern void sve_load_from_fpsimd_state(struct user_fpsimd_state const *state,
                                       unsigned long vq_minus_1);
 extern unsigned int sve_get_vl(void);
index dd8382e5ce82536dda50dedac277095ed1ce5725..0a7a647538787ea196ba47c7024ca1316b25ebb2 100644 (file)
@@ -69,10 +69,18 @@ SYM_FUNC_START(sve_load_from_fpsimd_state)
        ret
 SYM_FUNC_END(sve_load_from_fpsimd_state)
 
-/* Zero all SVE registers but the first 128-bits of each vector */
+/*
+ * Zero all SVE registers but the first 128-bits of each vector
+ *
+ * VQ must already be configured by caller, any further updates of VQ
+ * will need to ensure that the register state remains valid.
+ *
+ * x0 = VQ - 1
+ */
 SYM_FUNC_START(sve_flush_live)
+       cbz             x0, 1f  // A VQ-1 of 0 is 128 bits so no extra Z state
        sve_flush_z
-       sve_flush_p_ffr
+1:     sve_flush_p_ffr
        ret
 SYM_FUNC_END(sve_flush_live)
 
index ad3dd34a83cf98db4593c4a406728aecd75bc96e..e57b23f952846a1dd59df94821752f24359986c0 100644 (file)
@@ -957,8 +957,10 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
         * disabling the trap, otherwise update our in-memory copy.
         */
        if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
-               sve_set_vq(sve_vq_from_vl(current->thread.sve_vl) - 1);
-               sve_flush_live();
+               unsigned long vq_minus_one =
+                       sve_vq_from_vl(current->thread.sve_vl) - 1;
+               sve_set_vq(vq_minus_one);
+               sve_flush_live(vq_minus_one);
                fpsimd_bind_task_to_cpu();
        } else {
                fpsimd_to_sve(current);