Commit | Line | Data |
---|---|---|
caab277b | 1 | // SPDX-License-Identifier: GPL-2.0-only |
53631b54 CM |
2 | /* |
3 | * FP/SIMD context switching and fault handling | |
4 | * | |
5 | * Copyright (C) 2012 ARM Ltd. | |
6 | * Author: Catalin Marinas <catalin.marinas@arm.com> | |
53631b54 CM |
7 | */ |
8 | ||
7582e220 | 9 | #include <linux/bitmap.h> |
d06b76be | 10 | #include <linux/bitops.h> |
cb84d11e | 11 | #include <linux/bottom_half.h> |
bc0ee476 | 12 | #include <linux/bug.h> |
7582e220 | 13 | #include <linux/cache.h> |
bc0ee476 | 14 | #include <linux/compat.h> |
1e570f51 | 15 | #include <linux/compiler.h> |
32365e64 | 16 | #include <linux/cpu.h> |
fb1ab1ab | 17 | #include <linux/cpu_pm.h> |
97bcbee4 | 18 | #include <linux/ctype.h> |
53631b54 | 19 | #include <linux/kernel.h> |
94ef7ecb | 20 | #include <linux/linkage.h> |
bc0ee476 | 21 | #include <linux/irqflags.h> |
53631b54 | 22 | #include <linux/init.h> |
cb84d11e | 23 | #include <linux/percpu.h> |
2d2123bc | 24 | #include <linux/prctl.h> |
4328825d | 25 | #include <linux/preempt.h> |
bc0ee476 | 26 | #include <linux/ptrace.h> |
3f07c014 | 27 | #include <linux/sched/signal.h> |
bc0ee476 | 28 | #include <linux/sched/task_stack.h> |
53631b54 | 29 | #include <linux/signal.h> |
bc0ee476 | 30 | #include <linux/slab.h> |
31dc52b3 | 31 | #include <linux/stddef.h> |
4ffa09a9 | 32 | #include <linux/sysctl.h> |
41040cf7 | 33 | #include <linux/swab.h> |
53631b54 | 34 | |
af4a81b9 | 35 | #include <asm/esr.h> |
c6b90d5c | 36 | #include <asm/exception.h> |
53631b54 | 37 | #include <asm/fpsimd.h> |
c0cda3b8 | 38 | #include <asm/cpufeature.h> |
53631b54 | 39 | #include <asm/cputype.h> |
c6b90d5c | 40 | #include <asm/neon.h> |
2cf97d46 | 41 | #include <asm/processor.h> |
4328825d | 42 | #include <asm/simd.h> |
bc0ee476 DM |
43 | #include <asm/sigcontext.h> |
44 | #include <asm/sysreg.h> | |
45 | #include <asm/traps.h> | |
d06b76be | 46 | #include <asm/virt.h> |
53631b54 CM |
47 | |
48 | #define FPEXC_IOF (1 << 0) | |
49 | #define FPEXC_DZF (1 << 1) | |
50 | #define FPEXC_OFF (1 << 2) | |
51 | #define FPEXC_UFF (1 << 3) | |
52 | #define FPEXC_IXF (1 << 4) | |
53 | #define FPEXC_IDF (1 << 7) | |
54 | ||
005f78cd | 55 | /* |
bc0ee476 DM |
56 | * (Note: in this discussion, statements about FPSIMD apply equally to SVE.) |
57 | * | |
005f78cd AB |
58 | * In order to reduce the number of times the FPSIMD state is needlessly saved |
59 | * and restored, we need to keep track of two things: | |
60 | * (a) for each task, we need to remember which CPU was the last one to have | |
61 | * the task's FPSIMD state loaded into its FPSIMD registers; | |
62 | * (b) for each CPU, we need to remember which task's userland FPSIMD state has | |
63 | * been loaded into its FPSIMD registers most recently, or whether it has | |
64 | * been used to perform kernel mode NEON in the meantime. | |
65 | * | |
20b85472 | 66 | * For (a), we add a fpsimd_cpu field to thread_struct, which gets updated to |
ef769e32 | 67 | * the id of the current CPU every time the state is loaded onto a CPU. For (b), |
005f78cd AB |
68 | * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the |
69 | * address of the userland FPSIMD state of the task that was loaded onto the CPU | |
70 | * the most recently, or NULL if kernel mode NEON has been performed after that. | |
71 | * | |
72 | * With this in place, we no longer have to restore the next FPSIMD state right | |
73 | * when switching between tasks. Instead, we can defer this check to userland | |
74 | * resume, at which time we verify whether the CPU's fpsimd_last_state and the | |
20b85472 | 75 | * task's fpsimd_cpu are still mutually in sync. If this is the case, we |
005f78cd AB |
76 | * can omit the FPSIMD restore. |
77 | * | |
78 | * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to | |
79 | * indicate whether or not the userland FPSIMD state of the current task is | |
80 | * present in the registers. The flag is set unless the FPSIMD registers of this | |
81 | * CPU currently contain the most recent userland FPSIMD state of the current | |
31aa126d MZ |
82 | * task. If the task is behaving as a VMM, then this is will be managed by |
83 | * KVM which will clear it to indicate that the vcpu FPSIMD state is currently | |
84 | * loaded on the CPU, allowing the state to be saved if a FPSIMD-aware | |
85 | * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and | |
86 | * flag the register state as invalid. | |
005f78cd | 87 | * |
cb84d11e DM |
88 | * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may |
89 | * save the task's FPSIMD context back to task_struct from softirq context. | |
90 | * To prevent this from racing with the manipulation of the task's FPSIMD state | |
91 | * from task context and thereby corrupting the state, it is necessary to | |
92 | * protect any manipulation of a task's fpsimd_state or TIF_FOREIGN_FPSTATE | |
6dcdefcd JG |
93 | * flag with {, __}get_cpu_fpsimd_context(). This will still allow softirqs to |
94 | * run but prevent them to use FPSIMD. | |
cb84d11e | 95 | * |
005f78cd | 96 | * For a certain task, the sequence may look something like this: |
20b85472 | 97 | * - the task gets scheduled in; if both the task's fpsimd_cpu field |
005f78cd AB |
98 | * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu |
99 | * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is | |
100 | * cleared, otherwise it is set; | |
101 | * | |
102 | * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's | |
103 | * userland FPSIMD state is copied from memory to the registers, the task's | |
20b85472 | 104 | * fpsimd_cpu field is set to the id of the current CPU, the current |
005f78cd AB |
105 | * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the |
106 | * TIF_FOREIGN_FPSTATE flag is cleared; | |
107 | * | |
108 | * - the task executes an ordinary syscall; upon return to userland, the | |
109 | * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is | |
110 | * restored; | |
111 | * | |
112 | * - the task executes a syscall which executes some NEON instructions; this is | |
113 | * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD | |
114 | * register contents to memory, clears the fpsimd_last_state per-cpu variable | |
115 | * and sets the TIF_FOREIGN_FPSTATE flag; | |
116 | * | |
117 | * - the task gets preempted after kernel_neon_end() is called; as we have not | |
118 | * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so | |
119 | * whatever is in the FPSIMD registers is not saved to memory, but discarded. | |
120 | */ | |
cb968afc | 121 | struct fpsimd_last_state_struct { |
20b85472 | 122 | struct user_fpsimd_state *st; |
04950674 | 123 | void *sve_state; |
0033cd93 | 124 | void *za_state; |
b40c559b | 125 | u64 *svcr; |
04950674 | 126 | unsigned int sve_vl; |
af7167d6 | 127 | unsigned int sme_vl; |
cb968afc DM |
128 | }; |
129 | ||
130 | static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); | |
005f78cd | 131 | |
b5bc00ff MB |
132 | __ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = { |
133 | #ifdef CONFIG_ARM64_SVE | |
134 | [ARM64_VEC_SVE] = { | |
135 | .type = ARM64_VEC_SVE, | |
136 | .name = "SVE", | |
137 | .min_vl = SVE_VL_MIN, | |
138 | .max_vl = SVE_VL_MIN, | |
139 | .max_virtualisable_vl = SVE_VL_MIN, | |
140 | }, | |
141 | #endif | |
b42990d3 MB |
142 | #ifdef CONFIG_ARM64_SME |
143 | [ARM64_VEC_SME] = { | |
144 | .type = ARM64_VEC_SME, | |
145 | .name = "SME", | |
146 | }, | |
147 | #endif | |
b5bc00ff MB |
148 | }; |
149 | ||
5838a155 MB |
150 | static unsigned int vec_vl_inherit_flag(enum vec_type type) |
151 | { | |
152 | switch (type) { | |
153 | case ARM64_VEC_SVE: | |
154 | return TIF_SVE_VL_INHERIT; | |
9e4ab6c8 MB |
155 | case ARM64_VEC_SME: |
156 | return TIF_SME_VL_INHERIT; | |
5838a155 MB |
157 | default: |
158 | WARN_ON_ONCE(1); | |
159 | return 0; | |
160 | } | |
161 | } | |
162 | ||
b5bc00ff MB |
163 | struct vl_config { |
164 | int __default_vl; /* Default VL for tasks */ | |
165 | }; | |
166 | ||
167 | static struct vl_config vl_config[ARM64_VEC_MAX]; | |
168 | ||
04ee53a5 | 169 | static inline int get_default_vl(enum vec_type type) |
b5bc00ff MB |
170 | { |
171 | return READ_ONCE(vl_config[type].__default_vl); | |
172 | } | |
1e570f51 | 173 | |
04ee53a5 MB |
174 | #ifdef CONFIG_ARM64_SVE |
175 | ||
176 | static inline int get_sve_default_vl(void) | |
1e570f51 | 177 | { |
b5bc00ff | 178 | return get_default_vl(ARM64_VEC_SVE); |
1e570f51 | 179 | } |
79ab047c | 180 | |
04ee53a5 | 181 | static inline void set_default_vl(enum vec_type type, int val) |
1e570f51 | 182 | { |
b5bc00ff | 183 | WRITE_ONCE(vl_config[type].__default_vl, val); |
1e570f51 DM |
184 | } |
185 | ||
04ee53a5 | 186 | static inline void set_sve_default_vl(int val) |
b5bc00ff MB |
187 | { |
188 | set_default_vl(ARM64_VEC_SVE, val); | |
189 | } | |
624835ab | 190 | |
fdfa976c | 191 | static void __percpu *efi_sve_state; |
7582e220 DM |
192 | |
193 | #else /* ! CONFIG_ARM64_SVE */ | |
194 | ||
195 | /* Dummy declaration for code that will be optimised out: */ | |
fdfa976c | 196 | extern void __percpu *efi_sve_state; |
7582e220 DM |
197 | |
198 | #endif /* ! CONFIG_ARM64_SVE */ | |
199 | ||
b42990d3 MB |
200 | #ifdef CONFIG_ARM64_SME |
201 | ||
202 | static int get_sme_default_vl(void) | |
203 | { | |
204 | return get_default_vl(ARM64_VEC_SME); | |
205 | } | |
206 | ||
207 | static void set_sme_default_vl(int val) | |
208 | { | |
209 | set_default_vl(ARM64_VEC_SME, val); | |
210 | } | |
211 | ||
8bd7f91c MB |
212 | static void sme_free(struct task_struct *); |
213 | ||
214 | #else | |
215 | ||
216 | static inline void sme_free(struct task_struct *t) { } | |
217 | ||
b42990d3 MB |
218 | #endif |
219 | ||
6dcdefcd JG |
220 | DEFINE_PER_CPU(bool, fpsimd_context_busy); |
221 | EXPORT_PER_CPU_SYMBOL(fpsimd_context_busy); | |
222 | ||
b24b5205 MB |
223 | static void fpsimd_bind_task_to_cpu(void); |
224 | ||
6dcdefcd JG |
225 | static void __get_cpu_fpsimd_context(void) |
226 | { | |
227 | bool busy = __this_cpu_xchg(fpsimd_context_busy, true); | |
228 | ||
229 | WARN_ON(busy); | |
230 | } | |
231 | ||
232 | /* | |
233 | * Claim ownership of the CPU FPSIMD context for use by the calling context. | |
234 | * | |
235 | * The caller may freely manipulate the FPSIMD context metadata until | |
236 | * put_cpu_fpsimd_context() is called. | |
237 | * | |
238 | * The double-underscore version must only be called if you know the task | |
239 | * can't be preempted. | |
696207d4 SAS |
240 | * |
241 | * On RT kernels local_bh_disable() is not sufficient because it only | |
242 | * serializes soft interrupt related sections via a local lock, but stays | |
243 | * preemptible. Disabling preemption is the right choice here as bottom | |
244 | * half processing is always in thread context on RT kernels so it | |
245 | * implicitly prevents bottom half processing as well. | |
6dcdefcd JG |
246 | */ |
247 | static void get_cpu_fpsimd_context(void) | |
248 | { | |
696207d4 SAS |
249 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
250 | local_bh_disable(); | |
251 | else | |
252 | preempt_disable(); | |
6dcdefcd JG |
253 | __get_cpu_fpsimd_context(); |
254 | } | |
255 | ||
256 | static void __put_cpu_fpsimd_context(void) | |
257 | { | |
258 | bool busy = __this_cpu_xchg(fpsimd_context_busy, false); | |
259 | ||
260 | WARN_ON(!busy); /* No matching get_cpu_fpsimd_context()? */ | |
261 | } | |
262 | ||
263 | /* | |
264 | * Release the CPU FPSIMD context. | |
265 | * | |
266 | * Must be called from a context in which get_cpu_fpsimd_context() was | |
267 | * previously called, with no call to put_cpu_fpsimd_context() in the | |
268 | * meantime. | |
269 | */ | |
270 | static void put_cpu_fpsimd_context(void) | |
271 | { | |
272 | __put_cpu_fpsimd_context(); | |
696207d4 SAS |
273 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
274 | local_bh_enable(); | |
275 | else | |
276 | preempt_enable(); | |
6dcdefcd JG |
277 | } |
278 | ||
279 | static bool have_cpu_fpsimd_context(void) | |
280 | { | |
281 | return !preemptible() && __this_cpu_read(fpsimd_context_busy); | |
282 | } | |
283 | ||
5838a155 | 284 | unsigned int task_get_vl(const struct task_struct *task, enum vec_type type) |
0423eedc | 285 | { |
5838a155 | 286 | return task->thread.vl[type]; |
0423eedc MB |
287 | } |
288 | ||
5838a155 MB |
289 | void task_set_vl(struct task_struct *task, enum vec_type type, |
290 | unsigned long vl) | |
0423eedc | 291 | { |
5838a155 | 292 | task->thread.vl[type] = vl; |
0423eedc MB |
293 | } |
294 | ||
5838a155 MB |
295 | unsigned int task_get_vl_onexec(const struct task_struct *task, |
296 | enum vec_type type) | |
0423eedc | 297 | { |
5838a155 | 298 | return task->thread.vl_onexec[type]; |
0423eedc MB |
299 | } |
300 | ||
5838a155 MB |
301 | void task_set_vl_onexec(struct task_struct *task, enum vec_type type, |
302 | unsigned long vl) | |
0423eedc | 303 | { |
5838a155 | 304 | task->thread.vl_onexec[type] = vl; |
0423eedc MB |
305 | } |
306 | ||
af7167d6 MB |
307 | /* |
308 | * TIF_SME controls whether a task can use SME without trapping while | |
309 | * in userspace, when TIF_SME is set then we must have storage | |
310 | * alocated in sve_state and za_state to store the contents of both ZA | |
311 | * and the SVE registers for both streaming and non-streaming modes. | |
312 | * | |
313 | * If both SVCR.ZA and SVCR.SM are disabled then at any point we | |
314 | * may disable TIF_SME and reenable traps. | |
315 | */ | |
316 | ||
317 | ||
bc0ee476 DM |
318 | /* |
319 | * TIF_SVE controls whether a task can use SVE without trapping while | |
af7167d6 MB |
320 | * in userspace, and also (together with TIF_SME) the way a task's |
321 | * FPSIMD/SVE state is stored in thread_struct. | |
bc0ee476 DM |
322 | * |
323 | * The kernel uses this flag to track whether a user task is actively | |
324 | * using SVE, and therefore whether full SVE register state needs to | |
325 | * be tracked. If not, the cheaper FPSIMD context handling code can | |
326 | * be used instead of the more costly SVE equivalents. | |
327 | * | |
af7167d6 | 328 | * * TIF_SVE or SVCR.SM set: |
bc0ee476 DM |
329 | * |
330 | * The task can execute SVE instructions while in userspace without | |
331 | * trapping to the kernel. | |
332 | * | |
333 | * When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the | |
bb314511 | 334 | * corresponding Zn), P0-P15 and FFR are encoded in |
bc0ee476 | 335 | * task->thread.sve_state, formatted appropriately for vector |
af7167d6 MB |
336 | * length task->thread.sve_vl or, if SVCR.SM is set, |
337 | * task->thread.sme_vl. | |
bc0ee476 DM |
338 | * |
339 | * task->thread.sve_state must point to a valid buffer at least | |
340 | * sve_state_size(task) bytes in size. | |
341 | * | |
342 | * During any syscall, the kernel may optionally clear TIF_SVE and | |
343 | * discard the vector state except for the FPSIMD subset. | |
344 | * | |
345 | * * TIF_SVE clear: | |
346 | * | |
347 | * An attempt by the user task to execute an SVE instruction causes | |
348 | * do_sve_acc() to be called, which does some preparation and then | |
349 | * sets TIF_SVE. | |
350 | * | |
351 | * When stored, FPSIMD registers V0-V31 are encoded in | |
65896545 | 352 | * task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are |
bc0ee476 DM |
353 | * logically zero but not stored anywhere; P0-P15 and FFR are not |
354 | * stored and have unspecified values from userspace's point of | |
355 | * view. For hygiene purposes, the kernel zeroes them on next use, | |
356 | * but userspace is discouraged from relying on this. | |
357 | * | |
358 | * task->thread.sve_state does not need to be non-NULL, valid or any | |
359 | * particular size: it must not be dereferenced. | |
360 | * | |
65896545 DM |
361 | * * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state |
362 | * irrespective of whether TIF_SVE is clear or set, since these are | |
363 | * not vector length dependent. | |
bc0ee476 DM |
364 | */ |
365 | ||
366 | /* | |
367 | * Update current's FPSIMD/SVE registers from thread_struct. | |
368 | * | |
369 | * This function should be called only when the FPSIMD/SVE state in | |
370 | * thread_struct is known to be up to date, when preparing to enter | |
371 | * userspace. | |
bc0ee476 DM |
372 | */ |
373 | static void task_fpsimd_load(void) | |
374 | { | |
af7167d6 MB |
375 | bool restore_sve_regs = false; |
376 | bool restore_ffr; | |
377 | ||
52f73c38 | 378 | WARN_ON(!system_supports_fpsimd()); |
6dcdefcd | 379 | WARN_ON(!have_cpu_fpsimd_context()); |
bc0ee476 | 380 | |
af7167d6 | 381 | /* Check if we should restore SVE first */ |
ddc806b5 MB |
382 | if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) { |
383 | sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1); | |
af7167d6 MB |
384 | restore_sve_regs = true; |
385 | restore_ffr = true; | |
386 | } | |
387 | ||
388 | /* Restore SME, override SVE register configuration if needed */ | |
389 | if (system_supports_sme()) { | |
390 | unsigned long sme_vl = task_get_sme_vl(current); | |
391 | ||
0033cd93 | 392 | /* Ensure VL is set up for restoring data */ |
af7167d6 MB |
393 | if (test_thread_flag(TIF_SME)) |
394 | sme_set_vq(sve_vq_from_vl(sme_vl) - 1); | |
395 | ||
ec0067a6 | 396 | write_sysreg_s(current->thread.svcr, SYS_SVCR); |
af7167d6 | 397 | |
0033cd93 MB |
398 | if (thread_za_enabled(¤t->thread)) |
399 | za_load_state(current->thread.za_state); | |
400 | ||
af7167d6 MB |
401 | if (thread_sm_enabled(¤t->thread)) { |
402 | restore_sve_regs = true; | |
403 | restore_ffr = system_supports_fa64(); | |
404 | } | |
405 | } | |
406 | ||
407 | if (restore_sve_regs) | |
2cf97d46 | 408 | sve_load_state(sve_pffr(¤t->thread), |
af7167d6 MB |
409 | ¤t->thread.uw.fpsimd_state.fpsr, |
410 | restore_ffr); | |
411 | else | |
65896545 | 412 | fpsimd_load_state(¤t->thread.uw.fpsimd_state); |
bc0ee476 DM |
413 | } |
414 | ||
415 | /* | |
d1797615 | 416 | * Ensure FPSIMD/SVE storage in memory for the loaded context is up to |
432110cd MB |
417 | * date with respect to the CPU registers. Note carefully that the |
418 | * current context is the context last bound to the CPU stored in | |
419 | * last, if KVM is involved this may be the guest VM context rather | |
420 | * than the host thread for the VM pointed to by current. This means | |
421 | * that we must always reference the state storage via last rather | |
422 | * than via current, other than the TIF_ flags which KVM will | |
423 | * carefully maintain for us. | |
bc0ee476 | 424 | */ |
54b8c7cb | 425 | static void fpsimd_save(void) |
bc0ee476 | 426 | { |
04950674 DM |
427 | struct fpsimd_last_state_struct const *last = |
428 | this_cpu_ptr(&fpsimd_last_state); | |
e6b673b7 | 429 | /* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */ |
af7167d6 MB |
430 | bool save_sve_regs = false; |
431 | bool save_ffr; | |
432 | unsigned int vl; | |
d1797615 | 433 | |
52f73c38 | 434 | WARN_ON(!system_supports_fpsimd()); |
6dcdefcd | 435 | WARN_ON(!have_cpu_fpsimd_context()); |
bc0ee476 | 436 | |
2d481bd3 MB |
437 | if (test_thread_flag(TIF_FOREIGN_FPSTATE)) |
438 | return; | |
439 | ||
af7167d6 MB |
440 | if (test_thread_flag(TIF_SVE)) { |
441 | save_sve_regs = true; | |
442 | save_ffr = true; | |
443 | vl = last->sve_vl; | |
444 | } | |
445 | ||
446 | if (system_supports_sme()) { | |
b40c559b | 447 | u64 *svcr = last->svcr; |
ec0067a6 | 448 | *svcr = read_sysreg_s(SYS_SVCR); |
af7167d6 | 449 | |
ec0067a6 | 450 | *svcr = read_sysreg_s(SYS_SVCR); |
0033cd93 | 451 | |
ec0067a6 | 452 | if (*svcr & SVCR_ZA_MASK) |
0033cd93 | 453 | za_save_state(last->za_state); |
af7167d6 MB |
454 | |
455 | /* If we are in streaming mode override regular SVE. */ | |
ec0067a6 | 456 | if (*svcr & SVCR_SM_MASK) { |
af7167d6 MB |
457 | save_sve_regs = true; |
458 | save_ffr = system_supports_fa64(); | |
459 | vl = last->sme_vl; | |
460 | } | |
b40c559b MB |
461 | } |
462 | ||
af7167d6 MB |
463 | if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) { |
464 | /* Get the configured VL from RDVL, will account for SM */ | |
465 | if (WARN_ON(sve_get_vl() != vl)) { | |
2d481bd3 MB |
466 | /* |
467 | * Can't save the user regs, so current would | |
468 | * re-enter user with corrupt state. | |
469 | * There's no way to recover, so kill it: | |
470 | */ | |
471 | force_signal_inject(SIGKILL, SI_KERNEL, 0, 0); | |
472 | return; | |
473 | } | |
474 | ||
475 | sve_save_state((char *)last->sve_state + | |
af7167d6 MB |
476 | sve_ffr_offset(vl), |
477 | &last->st->fpsr, save_ffr); | |
2d481bd3 MB |
478 | } else { |
479 | fpsimd_save_state(last->st); | |
bc0ee476 DM |
480 | } |
481 | } | |
482 | ||
7582e220 DM |
483 | /* |
484 | * All vector length selection from userspace comes through here. | |
485 | * We're on a slow path, so some sanity-checks are included. | |
486 | * If things go wrong there's a bug somewhere, but try to fall back to a | |
487 | * safe choice. | |
488 | */ | |
b5bc00ff MB |
489 | static unsigned int find_supported_vector_length(enum vec_type type, |
490 | unsigned int vl) | |
7582e220 | 491 | { |
b5bc00ff | 492 | struct vl_info *info = &vl_info[type]; |
7582e220 | 493 | int bit; |
b5bc00ff | 494 | int max_vl = info->max_vl; |
7582e220 DM |
495 | |
496 | if (WARN_ON(!sve_vl_valid(vl))) | |
b5bc00ff | 497 | vl = info->min_vl; |
7582e220 DM |
498 | |
499 | if (WARN_ON(!sve_vl_valid(max_vl))) | |
b5bc00ff | 500 | max_vl = info->min_vl; |
7582e220 DM |
501 | |
502 | if (vl > max_vl) | |
503 | vl = max_vl; | |
b42990d3 MB |
504 | if (vl < info->min_vl) |
505 | vl = info->min_vl; | |
7582e220 | 506 | |
b5bc00ff | 507 | bit = find_next_bit(info->vq_map, SVE_VQ_MAX, |
ead9e430 DM |
508 | __vq_to_bit(sve_vq_from_vl(vl))); |
509 | return sve_vl_from_vq(__bit_to_vq(bit)); | |
7582e220 DM |
510 | } |
511 | ||
e575fb9e | 512 | #if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) |
4ffa09a9 | 513 | |
97bcbee4 | 514 | static int vec_proc_do_default_vl(struct ctl_table *table, int write, |
32927393 | 515 | void *buffer, size_t *lenp, loff_t *ppos) |
4ffa09a9 | 516 | { |
97bcbee4 MB |
517 | struct vl_info *info = table->extra1; |
518 | enum vec_type type = info->type; | |
4ffa09a9 | 519 | int ret; |
97bcbee4 | 520 | int vl = get_default_vl(type); |
4ffa09a9 DM |
521 | struct ctl_table tmp_table = { |
522 | .data = &vl, | |
523 | .maxlen = sizeof(vl), | |
524 | }; | |
525 | ||
526 | ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); | |
527 | if (ret || !write) | |
528 | return ret; | |
529 | ||
530 | /* Writing -1 has the special meaning "set to max": */ | |
87c021a8 | 531 | if (vl == -1) |
b5bc00ff | 532 | vl = info->max_vl; |
4ffa09a9 DM |
533 | |
534 | if (!sve_vl_valid(vl)) | |
535 | return -EINVAL; | |
536 | ||
97bcbee4 | 537 | set_default_vl(type, find_supported_vector_length(type, vl)); |
4ffa09a9 DM |
538 | return 0; |
539 | } | |
540 | ||
541 | static struct ctl_table sve_default_vl_table[] = { | |
542 | { | |
543 | .procname = "sve_default_vector_length", | |
544 | .mode = 0644, | |
97bcbee4 MB |
545 | .proc_handler = vec_proc_do_default_vl, |
546 | .extra1 = &vl_info[ARM64_VEC_SVE], | |
4ffa09a9 DM |
547 | }, |
548 | { } | |
549 | }; | |
550 | ||
551 | static int __init sve_sysctl_init(void) | |
552 | { | |
553 | if (system_supports_sve()) | |
554 | if (!register_sysctl("abi", sve_default_vl_table)) | |
555 | return -EINVAL; | |
556 | ||
557 | return 0; | |
558 | } | |
559 | ||
e575fb9e | 560 | #else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ |
4ffa09a9 | 561 | static int __init sve_sysctl_init(void) { return 0; } |
e575fb9e | 562 | #endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ |
4ffa09a9 | 563 | |
12f1bacf MB |
564 | #if defined(CONFIG_ARM64_SME) && defined(CONFIG_SYSCTL) |
565 | static struct ctl_table sme_default_vl_table[] = { | |
566 | { | |
567 | .procname = "sme_default_vector_length", | |
568 | .mode = 0644, | |
569 | .proc_handler = vec_proc_do_default_vl, | |
570 | .extra1 = &vl_info[ARM64_VEC_SME], | |
571 | }, | |
572 | { } | |
573 | }; | |
574 | ||
575 | static int __init sme_sysctl_init(void) | |
576 | { | |
577 | if (system_supports_sme()) | |
578 | if (!register_sysctl("abi", sme_default_vl_table)) | |
579 | return -EINVAL; | |
580 | ||
581 | return 0; | |
582 | } | |
583 | ||
584 | #else /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */ | |
585 | static int __init sme_sysctl_init(void) { return 0; } | |
586 | #endif /* ! (CONFIG_ARM64_SME && CONFIG_SYSCTL) */ | |
587 | ||
bc0ee476 DM |
588 | #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ |
589 | (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) | |
590 | ||
41040cf7 DM |
591 | #ifdef CONFIG_CPU_BIG_ENDIAN |
592 | static __uint128_t arm64_cpu_to_le128(__uint128_t x) | |
593 | { | |
594 | u64 a = swab64(x); | |
595 | u64 b = swab64(x >> 64); | |
596 | ||
597 | return ((__uint128_t)a << 64) | b; | |
598 | } | |
599 | #else | |
600 | static __uint128_t arm64_cpu_to_le128(__uint128_t x) | |
601 | { | |
602 | return x; | |
603 | } | |
604 | #endif | |
605 | ||
606 | #define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x) | |
607 | ||
d16af870 DM |
608 | static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst, |
609 | unsigned int vq) | |
610 | { | |
611 | unsigned int i; | |
612 | __uint128_t *p; | |
613 | ||
ed2f3e9f | 614 | for (i = 0; i < SVE_NUM_ZREGS; ++i) { |
d16af870 DM |
615 | p = (__uint128_t *)ZREG(sst, vq, i); |
616 | *p = arm64_cpu_to_le128(fst->vregs[i]); | |
617 | } | |
618 | } | |
619 | ||
bc0ee476 | 620 | /* |
65896545 | 621 | * Transfer the FPSIMD state in task->thread.uw.fpsimd_state to |
bc0ee476 DM |
622 | * task->thread.sve_state. |
623 | * | |
624 | * Task can be a non-runnable task, or current. In the latter case, | |
6dcdefcd JG |
625 | * the caller must have ownership of the cpu FPSIMD context before calling |
626 | * this function. | |
bc0ee476 DM |
627 | * task->thread.sve_state must point to at least sve_state_size(task) |
628 | * bytes of allocated kernel memory. | |
65896545 DM |
629 | * task->thread.uw.fpsimd_state must be up to date before calling this |
630 | * function. | |
bc0ee476 DM |
631 | */ |
632 | static void fpsimd_to_sve(struct task_struct *task) | |
633 | { | |
634 | unsigned int vq; | |
635 | void *sst = task->thread.sve_state; | |
65896545 | 636 | struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; |
bc0ee476 DM |
637 | |
638 | if (!system_supports_sve()) | |
639 | return; | |
640 | ||
e12310a0 | 641 | vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); |
d16af870 | 642 | __fpsimd_to_sve(sst, fst, vq); |
bc0ee476 DM |
643 | } |
644 | ||
8cd969d2 DM |
645 | /* |
646 | * Transfer the SVE state in task->thread.sve_state to | |
65896545 | 647 | * task->thread.uw.fpsimd_state. |
8cd969d2 DM |
648 | * |
649 | * Task can be a non-runnable task, or current. In the latter case, | |
6dcdefcd JG |
650 | * the caller must have ownership of the cpu FPSIMD context before calling |
651 | * this function. | |
8cd969d2 DM |
652 | * task->thread.sve_state must point to at least sve_state_size(task) |
653 | * bytes of allocated kernel memory. | |
654 | * task->thread.sve_state must be up to date before calling this function. | |
655 | */ | |
656 | static void sve_to_fpsimd(struct task_struct *task) | |
657 | { | |
e12310a0 | 658 | unsigned int vq, vl; |
8cd969d2 | 659 | void const *sst = task->thread.sve_state; |
65896545 | 660 | struct user_fpsimd_state *fst = &task->thread.uw.fpsimd_state; |
8cd969d2 | 661 | unsigned int i; |
41040cf7 | 662 | __uint128_t const *p; |
8cd969d2 DM |
663 | |
664 | if (!system_supports_sve()) | |
665 | return; | |
666 | ||
e12310a0 MB |
667 | vl = thread_get_cur_vl(&task->thread); |
668 | vq = sve_vq_from_vl(vl); | |
ed2f3e9f | 669 | for (i = 0; i < SVE_NUM_ZREGS; ++i) { |
41040cf7 DM |
670 | p = (__uint128_t const *)ZREG(sst, vq, i); |
671 | fst->vregs[i] = arm64_le128_to_cpu(*p); | |
672 | } | |
8cd969d2 DM |
673 | } |
674 | ||
bc0ee476 | 675 | #ifdef CONFIG_ARM64_SVE |
8e1f78a9 GU |
676 | /* |
677 | * Call __sve_free() directly only if you know task can't be scheduled | |
678 | * or preempted. | |
679 | */ | |
680 | static void __sve_free(struct task_struct *task) | |
681 | { | |
682 | kfree(task->thread.sve_state); | |
683 | task->thread.sve_state = NULL; | |
684 | } | |
685 | ||
686 | static void sve_free(struct task_struct *task) | |
687 | { | |
688 | WARN_ON(test_tsk_thread_flag(task, TIF_SVE)); | |
689 | ||
690 | __sve_free(task); | |
691 | } | |
bc0ee476 DM |
692 | |
693 | /* | |
694 | * Return how many bytes of memory are required to store the full SVE | |
695 | * state for task, given task's currently configured vector length. | |
696 | */ | |
8bd7f91c | 697 | size_t sve_state_size(struct task_struct const *task) |
bc0ee476 | 698 | { |
af7167d6 MB |
699 | unsigned int vl = 0; |
700 | ||
701 | if (system_supports_sve()) | |
702 | vl = task_get_sve_vl(task); | |
703 | if (system_supports_sme()) | |
704 | vl = max(vl, task_get_sme_vl(task)); | |
705 | ||
706 | return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)); | |
bc0ee476 DM |
707 | } |
708 | ||
709 | /* | |
710 | * Ensure that task->thread.sve_state is allocated and sufficiently large. | |
711 | * | |
712 | * This function should be used only in preparation for replacing | |
713 | * task->thread.sve_state with new data. The memory is always zeroed | |
714 | * here to prevent stale data from showing through: this is done in | |
715 | * the interest of testability and predictability: except in the | |
716 | * do_sve_acc() case, there is no ABI requirement to hide stale data | |
717 | * written previously be task. | |
718 | */ | |
719 | void sve_alloc(struct task_struct *task) | |
720 | { | |
721 | if (task->thread.sve_state) { | |
e35ac9d0 | 722 | memset(task->thread.sve_state, 0, sve_state_size(task)); |
bc0ee476 DM |
723 | return; |
724 | } | |
725 | ||
726 | /* This is a small allocation (maximum ~8KB) and Should Not Fail. */ | |
727 | task->thread.sve_state = | |
728 | kzalloc(sve_state_size(task), GFP_KERNEL); | |
bc0ee476 DM |
729 | } |
730 | ||
43d4da2c | 731 | |
e12310a0 MB |
732 | /* |
733 | * Force the FPSIMD state shared with SVE to be updated in the SVE state | |
734 | * even if the SVE state is the current active state. | |
735 | * | |
736 | * This should only be called by ptrace. task must be non-runnable. | |
737 | * task->thread.sve_state must point to at least sve_state_size(task) | |
738 | * bytes of allocated kernel memory. | |
739 | */ | |
740 | void fpsimd_force_sync_to_sve(struct task_struct *task) | |
741 | { | |
742 | fpsimd_to_sve(task); | |
743 | } | |
744 | ||
43d4da2c DM |
745 | /* |
746 | * Ensure that task->thread.sve_state is up to date with respect to | |
747 | * the user task, irrespective of when SVE is in use or not. | |
748 | * | |
749 | * This should only be called by ptrace. task must be non-runnable. | |
750 | * task->thread.sve_state must point to at least sve_state_size(task) | |
751 | * bytes of allocated kernel memory. | |
752 | */ | |
753 | void fpsimd_sync_to_sve(struct task_struct *task) | |
754 | { | |
e12310a0 MB |
755 | if (!test_tsk_thread_flag(task, TIF_SVE) && |
756 | !thread_sm_enabled(&task->thread)) | |
43d4da2c DM |
757 | fpsimd_to_sve(task); |
758 | } | |
759 | ||
760 | /* | |
65896545 | 761 | * Ensure that task->thread.uw.fpsimd_state is up to date with respect to |
43d4da2c DM |
762 | * the user task, irrespective of whether SVE is in use or not. |
763 | * | |
764 | * This should only be called by ptrace. task must be non-runnable. | |
765 | * task->thread.sve_state must point to at least sve_state_size(task) | |
766 | * bytes of allocated kernel memory. | |
767 | */ | |
768 | void sve_sync_to_fpsimd(struct task_struct *task) | |
769 | { | |
e12310a0 MB |
770 | if (test_tsk_thread_flag(task, TIF_SVE) || |
771 | thread_sm_enabled(&task->thread)) | |
43d4da2c DM |
772 | sve_to_fpsimd(task); |
773 | } | |
774 | ||
775 | /* | |
776 | * Ensure that task->thread.sve_state is up to date with respect to | |
65896545 | 777 | * the task->thread.uw.fpsimd_state. |
43d4da2c DM |
778 | * |
779 | * This should only be called by ptrace to merge new FPSIMD register | |
780 | * values into a task for which SVE is currently active. | |
781 | * task must be non-runnable. | |
782 | * task->thread.sve_state must point to at least sve_state_size(task) | |
783 | * bytes of allocated kernel memory. | |
65896545 | 784 | * task->thread.uw.fpsimd_state must already have been initialised with |
43d4da2c DM |
785 | * the new FPSIMD register values to be merged in. |
786 | */ | |
787 | void sve_sync_from_fpsimd_zeropad(struct task_struct *task) | |
788 | { | |
789 | unsigned int vq; | |
790 | void *sst = task->thread.sve_state; | |
65896545 | 791 | struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; |
43d4da2c DM |
792 | |
793 | if (!test_tsk_thread_flag(task, TIF_SVE)) | |
794 | return; | |
795 | ||
e12310a0 | 796 | vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); |
43d4da2c DM |
797 | |
798 | memset(sst, 0, SVE_SIG_REGS_SIZE(vq)); | |
d16af870 | 799 | __fpsimd_to_sve(sst, fst, vq); |
43d4da2c DM |
800 | } |
801 | ||
30c43e73 | 802 | int vec_set_vector_length(struct task_struct *task, enum vec_type type, |
7582e220 DM |
803 | unsigned long vl, unsigned long flags) |
804 | { | |
805 | if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT | | |
806 | PR_SVE_SET_VL_ONEXEC)) | |
807 | return -EINVAL; | |
808 | ||
809 | if (!sve_vl_valid(vl)) | |
810 | return -EINVAL; | |
811 | ||
812 | /* | |
30c43e73 MB |
813 | * Clamp to the maximum vector length that VL-agnostic code |
814 | * can work with. A flag may be assigned in the future to | |
815 | * allow setting of larger vector lengths without confusing | |
816 | * older software. | |
7582e220 | 817 | */ |
30c43e73 MB |
818 | if (vl > VL_ARCH_MAX) |
819 | vl = VL_ARCH_MAX; | |
7582e220 | 820 | |
30c43e73 | 821 | vl = find_supported_vector_length(type, vl); |
7582e220 DM |
822 | |
823 | if (flags & (PR_SVE_VL_INHERIT | | |
824 | PR_SVE_SET_VL_ONEXEC)) | |
30c43e73 | 825 | task_set_vl_onexec(task, type, vl); |
7582e220 DM |
826 | else |
827 | /* Reset VL to system default on next exec: */ | |
30c43e73 | 828 | task_set_vl_onexec(task, type, 0); |
7582e220 DM |
829 | |
830 | /* Only actually set the VL if not deferred: */ | |
831 | if (flags & PR_SVE_SET_VL_ONEXEC) | |
832 | goto out; | |
833 | ||
30c43e73 | 834 | if (vl == task_get_vl(task, type)) |
7582e220 DM |
835 | goto out; |
836 | ||
837 | /* | |
838 | * To ensure the FPSIMD bits of the SVE vector registers are preserved, | |
839 | * write any live register state back to task_struct, and convert to a | |
e12310a0 | 840 | * regular FPSIMD thread. |
7582e220 DM |
841 | */ |
842 | if (task == current) { | |
6dcdefcd | 843 | get_cpu_fpsimd_context(); |
7582e220 | 844 | |
d1797615 | 845 | fpsimd_save(); |
7582e220 DM |
846 | } |
847 | ||
848 | fpsimd_flush_task_state(task); | |
af7167d6 MB |
849 | if (test_and_clear_tsk_thread_flag(task, TIF_SVE) || |
850 | thread_sm_enabled(&task->thread)) | |
7582e220 DM |
851 | sve_to_fpsimd(task); |
852 | ||
8bd7f91c | 853 | if (system_supports_sme() && type == ARM64_VEC_SME) { |
ec0067a6 MB |
854 | task->thread.svcr &= ~(SVCR_SM_MASK | |
855 | SVCR_ZA_MASK); | |
8bd7f91c MB |
856 | clear_thread_flag(TIF_SME); |
857 | } | |
b40c559b | 858 | |
7582e220 | 859 | if (task == current) |
6dcdefcd | 860 | put_cpu_fpsimd_context(); |
7582e220 DM |
861 | |
862 | /* | |
8bd7f91c MB |
863 | * Force reallocation of task SVE and SME state to the correct |
864 | * size on next use: | |
7582e220 DM |
865 | */ |
866 | sve_free(task); | |
8bd7f91c MB |
867 | if (system_supports_sme() && type == ARM64_VEC_SME) |
868 | sme_free(task); | |
7582e220 | 869 | |
30c43e73 | 870 | task_set_vl(task, type, vl); |
7582e220 DM |
871 | |
872 | out: | |
30c43e73 | 873 | update_tsk_thread_flag(task, vec_vl_inherit_flag(type), |
09d1223a | 874 | flags & PR_SVE_VL_INHERIT); |
7582e220 DM |
875 | |
876 | return 0; | |
877 | } | |
878 | ||
2d2123bc DM |
879 | /* |
880 | * Encode the current vector length and flags for return. | |
30c43e73 MB |
881 | * This is only required for prctl(): ptrace has separate fields. |
882 | * SVE and SME use the same bits for _ONEXEC and _INHERIT. | |
2d2123bc | 883 | * |
30c43e73 | 884 | * flags are as for vec_set_vector_length(). |
2d2123bc | 885 | */ |
30c43e73 | 886 | static int vec_prctl_status(enum vec_type type, unsigned long flags) |
2d2123bc DM |
887 | { |
888 | int ret; | |
889 | ||
890 | if (flags & PR_SVE_SET_VL_ONEXEC) | |
30c43e73 | 891 | ret = task_get_vl_onexec(current, type); |
2d2123bc | 892 | else |
30c43e73 | 893 | ret = task_get_vl(current, type); |
2d2123bc | 894 | |
30c43e73 | 895 | if (test_thread_flag(vec_vl_inherit_flag(type))) |
2d2123bc DM |
896 | ret |= PR_SVE_VL_INHERIT; |
897 | ||
898 | return ret; | |
899 | } | |
900 | ||
901 | /* PR_SVE_SET_VL */ | |
902 | int sve_set_current_vl(unsigned long arg) | |
903 | { | |
904 | unsigned long vl, flags; | |
905 | int ret; | |
906 | ||
907 | vl = arg & PR_SVE_VL_LEN_MASK; | |
908 | flags = arg & ~vl; | |
909 | ||
4b7a6ce7 | 910 | if (!system_supports_sve() || is_compat_task()) |
2d2123bc DM |
911 | return -EINVAL; |
912 | ||
30c43e73 | 913 | ret = vec_set_vector_length(current, ARM64_VEC_SVE, vl, flags); |
2d2123bc DM |
914 | if (ret) |
915 | return ret; | |
916 | ||
30c43e73 | 917 | return vec_prctl_status(ARM64_VEC_SVE, flags); |
2d2123bc DM |
918 | } |
919 | ||
920 | /* PR_SVE_GET_VL */ | |
921 | int sve_get_current_vl(void) | |
922 | { | |
4b7a6ce7 | 923 | if (!system_supports_sve() || is_compat_task()) |
2d2123bc DM |
924 | return -EINVAL; |
925 | ||
30c43e73 | 926 | return vec_prctl_status(ARM64_VEC_SVE, 0); |
2d2123bc DM |
927 | } |
928 | ||
9e4ab6c8 MB |
929 | #ifdef CONFIG_ARM64_SME |
930 | /* PR_SME_SET_VL */ | |
931 | int sme_set_current_vl(unsigned long arg) | |
932 | { | |
933 | unsigned long vl, flags; | |
934 | int ret; | |
935 | ||
936 | vl = arg & PR_SME_VL_LEN_MASK; | |
937 | flags = arg & ~vl; | |
938 | ||
939 | if (!system_supports_sme() || is_compat_task()) | |
940 | return -EINVAL; | |
941 | ||
942 | ret = vec_set_vector_length(current, ARM64_VEC_SME, vl, flags); | |
943 | if (ret) | |
944 | return ret; | |
945 | ||
946 | return vec_prctl_status(ARM64_VEC_SME, flags); | |
947 | } | |
948 | ||
949 | /* PR_SME_GET_VL */ | |
950 | int sme_get_current_vl(void) | |
951 | { | |
952 | if (!system_supports_sme() || is_compat_task()) | |
953 | return -EINVAL; | |
954 | ||
955 | return vec_prctl_status(ARM64_VEC_SME, 0); | |
956 | } | |
957 | #endif /* CONFIG_ARM64_SME */ | |
958 | ||
b5bc00ff MB |
959 | static void vec_probe_vqs(struct vl_info *info, |
960 | DECLARE_BITMAP(map, SVE_VQ_MAX)) | |
2e0f2478 DM |
961 | { |
962 | unsigned int vq, vl; | |
2e0f2478 DM |
963 | |
964 | bitmap_zero(map, SVE_VQ_MAX); | |
965 | ||
2e0f2478 | 966 | for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) { |
b5bc00ff | 967 | write_vl(info->type, vq - 1); /* self-syncing */ |
b42990d3 MB |
968 | |
969 | switch (info->type) { | |
970 | case ARM64_VEC_SVE: | |
971 | vl = sve_get_vl(); | |
972 | break; | |
973 | case ARM64_VEC_SME: | |
974 | vl = sme_get_vl(); | |
975 | break; | |
976 | default: | |
977 | vl = 0; | |
978 | break; | |
979 | } | |
980 | ||
981 | /* Minimum VL identified? */ | |
982 | if (sve_vq_from_vl(vl) > vq) | |
983 | break; | |
984 | ||
2e0f2478 | 985 | vq = sve_vq_from_vl(vl); /* skip intervening lengths */ |
ead9e430 | 986 | set_bit(__vq_to_bit(vq), map); |
2e0f2478 DM |
987 | } |
988 | } | |
989 | ||
8b08e840 DM |
990 | /* |
991 | * Initialise the set of known supported VQs for the boot CPU. | |
992 | * This is called during kernel boot, before secondary CPUs are brought up. | |
993 | */ | |
b5bc00ff | 994 | void __init vec_init_vq_map(enum vec_type type) |
2e0f2478 | 995 | { |
b5bc00ff MB |
996 | struct vl_info *info = &vl_info[type]; |
997 | vec_probe_vqs(info, info->vq_map); | |
998 | bitmap_copy(info->vq_partial_map, info->vq_map, SVE_VQ_MAX); | |
2e0f2478 DM |
999 | } |
1000 | ||
1001 | /* | |
1002 | * If we haven't committed to the set of supported VQs yet, filter out | |
1003 | * those not supported by the current CPU. | |
8b08e840 | 1004 | * This function is called during the bring-up of early secondary CPUs only. |
2e0f2478 | 1005 | */ |
b5bc00ff | 1006 | void vec_update_vq_map(enum vec_type type) |
2e0f2478 | 1007 | { |
b5bc00ff | 1008 | struct vl_info *info = &vl_info[type]; |
d06b76be DM |
1009 | DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); |
1010 | ||
b5bc00ff MB |
1011 | vec_probe_vqs(info, tmp_map); |
1012 | bitmap_and(info->vq_map, info->vq_map, tmp_map, SVE_VQ_MAX); | |
1013 | bitmap_or(info->vq_partial_map, info->vq_partial_map, tmp_map, | |
1014 | SVE_VQ_MAX); | |
2e0f2478 DM |
1015 | } |
1016 | ||
8b08e840 DM |
1017 | /* |
1018 | * Check whether the current CPU supports all VQs in the committed set. | |
1019 | * This function is called during the bring-up of late secondary CPUs only. | |
1020 | */ | |
b5bc00ff | 1021 | int vec_verify_vq_map(enum vec_type type) |
2e0f2478 | 1022 | { |
b5bc00ff | 1023 | struct vl_info *info = &vl_info[type]; |
d06b76be DM |
1024 | DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); |
1025 | unsigned long b; | |
2e0f2478 | 1026 | |
b5bc00ff | 1027 | vec_probe_vqs(info, tmp_map); |
d06b76be DM |
1028 | |
1029 | bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); | |
b5bc00ff MB |
1030 | if (bitmap_intersects(tmp_map, info->vq_map, SVE_VQ_MAX)) { |
1031 | pr_warn("%s: cpu%d: Required vector length(s) missing\n", | |
1032 | info->name, smp_processor_id()); | |
d06b76be | 1033 | return -EINVAL; |
2e0f2478 DM |
1034 | } |
1035 | ||
d06b76be DM |
1036 | if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available()) |
1037 | return 0; | |
1038 | ||
1039 | /* | |
1040 | * For KVM, it is necessary to ensure that this CPU doesn't | |
1041 | * support any vector length that guests may have probed as | |
1042 | * unsupported. | |
1043 | */ | |
1044 | ||
1045 | /* Recover the set of supported VQs: */ | |
1046 | bitmap_complement(tmp_map, tmp_map, SVE_VQ_MAX); | |
1047 | /* Find VQs supported that are not globally supported: */ | |
b5bc00ff | 1048 | bitmap_andnot(tmp_map, tmp_map, info->vq_map, SVE_VQ_MAX); |
d06b76be DM |
1049 | |
1050 | /* Find the lowest such VQ, if any: */ | |
1051 | b = find_last_bit(tmp_map, SVE_VQ_MAX); | |
1052 | if (b >= SVE_VQ_MAX) | |
1053 | return 0; /* no mismatches */ | |
1054 | ||
1055 | /* | |
1056 | * Mismatches above sve_max_virtualisable_vl are fine, since | |
1057 | * no guest is allowed to configure ZCR_EL2.LEN to exceed this: | |
1058 | */ | |
b5bc00ff MB |
1059 | if (sve_vl_from_vq(__bit_to_vq(b)) <= info->max_virtualisable_vl) { |
1060 | pr_warn("%s: cpu%d: Unsupported vector length(s) present\n", | |
1061 | info->name, smp_processor_id()); | |
d06b76be DM |
1062 | return -EINVAL; |
1063 | } | |
1064 | ||
1065 | return 0; | |
2e0f2478 DM |
1066 | } |
1067 | ||
fdfa976c DM |
1068 | static void __init sve_efi_setup(void) |
1069 | { | |
e0838f63 MB |
1070 | int max_vl = 0; |
1071 | int i; | |
b5bc00ff | 1072 | |
fdfa976c DM |
1073 | if (!IS_ENABLED(CONFIG_EFI)) |
1074 | return; | |
1075 | ||
e0838f63 MB |
1076 | for (i = 0; i < ARRAY_SIZE(vl_info); i++) |
1077 | max_vl = max(vl_info[i].max_vl, max_vl); | |
1078 | ||
fdfa976c DM |
1079 | /* |
1080 | * alloc_percpu() warns and prints a backtrace if this goes wrong. | |
1081 | * This is evidence of a crippled system and we are returning void, | |
1082 | * so no attempt is made to handle this situation here. | |
1083 | */ | |
e0838f63 | 1084 | if (!sve_vl_valid(max_vl)) |
fdfa976c DM |
1085 | goto fail; |
1086 | ||
1087 | efi_sve_state = __alloc_percpu( | |
e0838f63 | 1088 | SVE_SIG_REGS_SIZE(sve_vq_from_vl(max_vl)), SVE_VQ_BYTES); |
fdfa976c DM |
1089 | if (!efi_sve_state) |
1090 | goto fail; | |
1091 | ||
1092 | return; | |
1093 | ||
1094 | fail: | |
1095 | panic("Cannot allocate percpu memory for EFI SVE save/restore"); | |
1096 | } | |
1097 | ||
2e0f2478 DM |
1098 | /* |
1099 | * Enable SVE for EL1. | |
1100 | * Intended for use by the cpufeatures code during CPU boot. | |
1101 | */ | |
c0cda3b8 | 1102 | void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) |
2e0f2478 DM |
1103 | { |
1104 | write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_ZEN_EL1EN, CPACR_EL1); | |
1105 | isb(); | |
2e0f2478 DM |
1106 | } |
1107 | ||
31dc52b3 DM |
1108 | /* |
1109 | * Read the pseudo-ZCR used by cpufeatures to identify the supported SVE | |
1110 | * vector length. | |
1111 | * | |
1112 | * Use only if SVE is present. | |
1113 | * This function clobbers the SVE vector length. | |
1114 | */ | |
1115 | u64 read_zcr_features(void) | |
1116 | { | |
1117 | u64 zcr; | |
1118 | unsigned int vq_max; | |
1119 | ||
1120 | /* | |
1121 | * Set the maximum possible VL, and write zeroes to all other | |
1122 | * bits to see if they stick. | |
1123 | */ | |
1124 | sve_kernel_enable(NULL); | |
1125 | write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); | |
1126 | ||
1127 | zcr = read_sysreg_s(SYS_ZCR_EL1); | |
1128 | zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ | |
1129 | vq_max = sve_vq_from_vl(sve_get_vl()); | |
1130 | zcr |= vq_max - 1; /* set LEN field to maximum effective value */ | |
1131 | ||
1132 | return zcr; | |
1133 | } | |
1134 | ||
2e0f2478 DM |
1135 | void __init sve_setup(void) |
1136 | { | |
b5bc00ff | 1137 | struct vl_info *info = &vl_info[ARM64_VEC_SVE]; |
2e0f2478 | 1138 | u64 zcr; |
d06b76be DM |
1139 | DECLARE_BITMAP(tmp_map, SVE_VQ_MAX); |
1140 | unsigned long b; | |
2e0f2478 DM |
1141 | |
1142 | if (!system_supports_sve()) | |
1143 | return; | |
1144 | ||
1145 | /* | |
1146 | * The SVE architecture mandates support for 128-bit vectors, | |
1147 | * so sve_vq_map must have at least SVE_VQ_MIN set. | |
1148 | * If something went wrong, at least try to patch it up: | |
1149 | */ | |
b5bc00ff MB |
1150 | if (WARN_ON(!test_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map))) |
1151 | set_bit(__vq_to_bit(SVE_VQ_MIN), info->vq_map); | |
2e0f2478 DM |
1152 | |
1153 | zcr = read_sanitised_ftr_reg(SYS_ZCR_EL1); | |
b5bc00ff | 1154 | info->max_vl = sve_vl_from_vq((zcr & ZCR_ELx_LEN_MASK) + 1); |
2e0f2478 DM |
1155 | |
1156 | /* | |
1157 | * Sanity-check that the max VL we determined through CPU features | |
1158 | * corresponds properly to sve_vq_map. If not, do our best: | |
1159 | */ | |
b5bc00ff MB |
1160 | if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SVE, |
1161 | info->max_vl))) | |
1162 | info->max_vl = find_supported_vector_length(ARM64_VEC_SVE, | |
1163 | info->max_vl); | |
2e0f2478 DM |
1164 | |
1165 | /* | |
1166 | * For the default VL, pick the maximum supported value <= 64. | |
1167 | * VL == 64 is guaranteed not to grow the signal frame. | |
1168 | */ | |
b5bc00ff | 1169 | set_sve_default_vl(find_supported_vector_length(ARM64_VEC_SVE, 64)); |
2e0f2478 | 1170 | |
b5bc00ff | 1171 | bitmap_andnot(tmp_map, info->vq_partial_map, info->vq_map, |
d06b76be DM |
1172 | SVE_VQ_MAX); |
1173 | ||
1174 | b = find_last_bit(tmp_map, SVE_VQ_MAX); | |
1175 | if (b >= SVE_VQ_MAX) | |
1176 | /* No non-virtualisable VLs found */ | |
b5bc00ff | 1177 | info->max_virtualisable_vl = SVE_VQ_MAX; |
d06b76be DM |
1178 | else if (WARN_ON(b == SVE_VQ_MAX - 1)) |
1179 | /* No virtualisable VLs? This is architecturally forbidden. */ | |
b5bc00ff | 1180 | info->max_virtualisable_vl = SVE_VQ_MIN; |
d06b76be | 1181 | else /* b + 1 < SVE_VQ_MAX */ |
b5bc00ff | 1182 | info->max_virtualisable_vl = sve_vl_from_vq(__bit_to_vq(b + 1)); |
d06b76be | 1183 | |
b5bc00ff MB |
1184 | if (info->max_virtualisable_vl > info->max_vl) |
1185 | info->max_virtualisable_vl = info->max_vl; | |
d06b76be | 1186 | |
b5bc00ff MB |
1187 | pr_info("%s: maximum available vector length %u bytes per vector\n", |
1188 | info->name, info->max_vl); | |
1189 | pr_info("%s: default vector length %u bytes per vector\n", | |
1190 | info->name, get_sve_default_vl()); | |
fdfa976c | 1191 | |
d06b76be | 1192 | /* KVM decides whether to support mismatched systems. Just warn here: */ |
b5bc00ff MB |
1193 | if (sve_max_virtualisable_vl() < sve_max_vl()) |
1194 | pr_warn("%s: unvirtualisable vector lengths present\n", | |
1195 | info->name); | |
d06b76be | 1196 | |
fdfa976c | 1197 | sve_efi_setup(); |
2e0f2478 DM |
1198 | } |
1199 | ||
bc0ee476 DM |
1200 | /* |
1201 | * Called from the put_task_struct() path, which cannot get here | |
1202 | * unless dead_task is really dead and not schedulable. | |
1203 | */ | |
1204 | void fpsimd_release_task(struct task_struct *dead_task) | |
1205 | { | |
1206 | __sve_free(dead_task); | |
8bd7f91c | 1207 | sme_free(dead_task); |
bc0ee476 DM |
1208 | } |
1209 | ||
1210 | #endif /* CONFIG_ARM64_SVE */ | |
1211 | ||
5e64b862 MB |
1212 | #ifdef CONFIG_ARM64_SME |
1213 | ||
8bd7f91c MB |
1214 | /* |
1215 | * Ensure that task->thread.za_state is allocated and sufficiently large. | |
1216 | * | |
1217 | * This function should be used only in preparation for replacing | |
1218 | * task->thread.za_state with new data. The memory is always zeroed | |
1219 | * here to prevent stale data from showing through: this is done in | |
1220 | * the interest of testability and predictability, the architecture | |
1221 | * guarantees that when ZA is enabled it will be zeroed. | |
1222 | */ | |
1223 | void sme_alloc(struct task_struct *task) | |
1224 | { | |
1225 | if (task->thread.za_state) { | |
1226 | memset(task->thread.za_state, 0, za_state_size(task)); | |
1227 | return; | |
1228 | } | |
1229 | ||
1230 | /* This could potentially be up to 64K. */ | |
1231 | task->thread.za_state = | |
1232 | kzalloc(za_state_size(task), GFP_KERNEL); | |
1233 | } | |
1234 | ||
1235 | static void sme_free(struct task_struct *task) | |
1236 | { | |
1237 | kfree(task->thread.za_state); | |
1238 | task->thread.za_state = NULL; | |
1239 | } | |
1240 | ||
5e64b862 MB |
1241 | void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) |
1242 | { | |
1243 | /* Set priority for all PEs to architecturally defined minimum */ | |
1244 | write_sysreg_s(read_sysreg_s(SYS_SMPRI_EL1) & ~SMPRI_EL1_PRIORITY_MASK, | |
1245 | SYS_SMPRI_EL1); | |
1246 | ||
1247 | /* Allow SME in kernel */ | |
1248 | write_sysreg(read_sysreg(CPACR_EL1) | CPACR_EL1_SMEN_EL1EN, CPACR_EL1); | |
1249 | isb(); | |
a9d69158 MB |
1250 | |
1251 | /* Allow EL0 to access TPIDR2 */ | |
1252 | write_sysreg(read_sysreg(SCTLR_EL1) | SCTLR_ELx_ENTP2, SCTLR_EL1); | |
1253 | isb(); | |
5e64b862 MB |
1254 | } |
1255 | ||
1256 | /* | |
1257 | * This must be called after sme_kernel_enable(), we rely on the | |
1258 | * feature table being sorted to ensure this. | |
1259 | */ | |
1260 | void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) | |
1261 | { | |
1262 | /* Allow use of FA64 */ | |
1263 | write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_FA64_MASK, | |
1264 | SYS_SMCR_EL1); | |
1265 | } | |
1266 | ||
b42990d3 MB |
1267 | /* |
1268 | * Read the pseudo-SMCR used by cpufeatures to identify the supported | |
1269 | * vector length. | |
1270 | * | |
1271 | * Use only if SME is present. | |
1272 | * This function clobbers the SME vector length. | |
1273 | */ | |
1274 | u64 read_smcr_features(void) | |
1275 | { | |
1276 | u64 smcr; | |
1277 | unsigned int vq_max; | |
1278 | ||
1279 | sme_kernel_enable(NULL); | |
1280 | sme_smstart_sm(); | |
1281 | ||
1282 | /* | |
1283 | * Set the maximum possible VL. | |
1284 | */ | |
1285 | write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, | |
1286 | SYS_SMCR_EL1); | |
1287 | ||
1288 | smcr = read_sysreg_s(SYS_SMCR_EL1); | |
1289 | smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ | |
1290 | vq_max = sve_vq_from_vl(sve_get_vl()); | |
1291 | smcr |= vq_max - 1; /* set LEN field to maximum effective value */ | |
1292 | ||
1293 | sme_smstop_sm(); | |
1294 | ||
1295 | return smcr; | |
1296 | } | |
1297 | ||
1298 | void __init sme_setup(void) | |
1299 | { | |
1300 | struct vl_info *info = &vl_info[ARM64_VEC_SME]; | |
1301 | u64 smcr; | |
1302 | int min_bit; | |
1303 | ||
1304 | if (!system_supports_sme()) | |
1305 | return; | |
1306 | ||
1307 | /* | |
1308 | * SME doesn't require any particular vector length be | |
1309 | * supported but it does require at least one. We should have | |
1310 | * disabled the feature entirely while bringing up CPUs but | |
1311 | * let's double check here. | |
1312 | */ | |
1313 | WARN_ON(bitmap_empty(info->vq_map, SVE_VQ_MAX)); | |
1314 | ||
1315 | min_bit = find_last_bit(info->vq_map, SVE_VQ_MAX); | |
1316 | info->min_vl = sve_vl_from_vq(__bit_to_vq(min_bit)); | |
1317 | ||
1318 | smcr = read_sanitised_ftr_reg(SYS_SMCR_EL1); | |
1319 | info->max_vl = sve_vl_from_vq((smcr & SMCR_ELx_LEN_MASK) + 1); | |
1320 | ||
1321 | /* | |
1322 | * Sanity-check that the max VL we determined through CPU features | |
1323 | * corresponds properly to sme_vq_map. If not, do our best: | |
1324 | */ | |
1325 | if (WARN_ON(info->max_vl != find_supported_vector_length(ARM64_VEC_SME, | |
1326 | info->max_vl))) | |
1327 | info->max_vl = find_supported_vector_length(ARM64_VEC_SME, | |
1328 | info->max_vl); | |
1329 | ||
1330 | WARN_ON(info->min_vl > info->max_vl); | |
1331 | ||
1332 | /* | |
1333 | * For the default VL, pick the maximum supported value <= 32 | |
1334 | * (256 bits) if there is one since this is guaranteed not to | |
1335 | * grow the signal frame when in streaming mode, otherwise the | |
1336 | * minimum available VL will be used. | |
1337 | */ | |
1338 | set_sme_default_vl(find_supported_vector_length(ARM64_VEC_SME, 32)); | |
1339 | ||
1340 | pr_info("SME: minimum available vector length %u bytes per vector\n", | |
1341 | info->min_vl); | |
1342 | pr_info("SME: maximum available vector length %u bytes per vector\n", | |
1343 | info->max_vl); | |
1344 | pr_info("SME: default vector length %u bytes per vector\n", | |
1345 | get_sme_default_vl()); | |
1346 | } | |
1347 | ||
1348 | #endif /* CONFIG_ARM64_SME */ | |
5e64b862 | 1349 | |
8bd7f91c MB |
1350 | static void sve_init_regs(void) |
1351 | { | |
1352 | /* | |
1353 | * Convert the FPSIMD state to SVE, zeroing all the state that | |
1354 | * is not shared with FPSIMD. If (as is likely) the current | |
1355 | * state is live in the registers then do this there and | |
1356 | * update our metadata for the current task including | |
1357 | * disabling the trap, otherwise update our in-memory copy. | |
1358 | * We are guaranteed to not be in streaming mode, we can only | |
1359 | * take a SVE trap when not in streaming mode and we can't be | |
1360 | * in streaming mode when taking a SME trap. | |
1361 | */ | |
1362 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { | |
1363 | unsigned long vq_minus_one = | |
1364 | sve_vq_from_vl(task_get_sve_vl(current)) - 1; | |
1365 | sve_set_vq(vq_minus_one); | |
1366 | sve_flush_live(true, vq_minus_one); | |
1367 | fpsimd_bind_task_to_cpu(); | |
1368 | } else { | |
1369 | fpsimd_to_sve(current); | |
1370 | } | |
1371 | } | |
1372 | ||
bc0ee476 DM |
1373 | /* |
1374 | * Trapped SVE access | |
1375 | * | |
1376 | * Storage is allocated for the full SVE state, the current FPSIMD | |
cccb78ce MB |
1377 | * register contents are migrated across, and the access trap is |
1378 | * disabled. | |
bc0ee476 | 1379 | * |
f186a84d | 1380 | * TIF_SVE should be clear on entry: otherwise, fpsimd_restore_current_state() |
bc0ee476 DM |
1381 | * would have disabled the SVE access trap for userspace during |
1382 | * ret_to_user, making an SVE access trap impossible in that case. | |
1383 | */ | |
8d56e5c5 | 1384 | void do_sve_acc(unsigned long esr, struct pt_regs *regs) |
bc0ee476 DM |
1385 | { |
1386 | /* Even if we chose not to use SVE, the hardware could still trap: */ | |
1387 | if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { | |
4ef333b2 | 1388 | force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); |
bc0ee476 DM |
1389 | return; |
1390 | } | |
1391 | ||
1392 | sve_alloc(current); | |
7559b7d7 MB |
1393 | if (!current->thread.sve_state) { |
1394 | force_sig(SIGKILL); | |
1395 | return; | |
1396 | } | |
bc0ee476 | 1397 | |
6dcdefcd | 1398 | get_cpu_fpsimd_context(); |
bc0ee476 | 1399 | |
bc0ee476 DM |
1400 | if (test_and_set_thread_flag(TIF_SVE)) |
1401 | WARN_ON(1); /* SVE access shouldn't have trapped */ | |
1402 | ||
cccb78ce | 1403 | /* |
8bd7f91c MB |
1404 | * Even if the task can have used streaming mode we can only |
1405 | * generate SVE access traps in normal SVE mode and | |
1406 | * transitioning out of streaming mode may discard any | |
1407 | * streaming mode state. Always clear the high bits to avoid | |
1408 | * any potential errors tracking what is properly initialised. | |
1409 | */ | |
1410 | sve_init_regs(); | |
1411 | ||
1412 | put_cpu_fpsimd_context(); | |
1413 | } | |
1414 | ||
1415 | /* | |
1416 | * Trapped SME access | |
1417 | * | |
1418 | * Storage is allocated for the full SVE and SME state, the current | |
1419 | * FPSIMD register contents are migrated to SVE if SVE is not already | |
1420 | * active, and the access trap is disabled. | |
1421 | * | |
1422 | * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state() | |
1423 | * would have disabled the SME access trap for userspace during | |
1424 | * ret_to_user, making an SVE access trap impossible in that case. | |
1425 | */ | |
0616ea3f | 1426 | void do_sme_acc(unsigned long esr, struct pt_regs *regs) |
8bd7f91c MB |
1427 | { |
1428 | /* Even if we chose not to use SME, the hardware could still trap: */ | |
1429 | if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) { | |
1430 | force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); | |
1431 | return; | |
1432 | } | |
1433 | ||
1434 | /* | |
1435 | * If this not a trap due to SME being disabled then something | |
1436 | * is being used in the wrong mode, report as SIGILL. | |
cccb78ce | 1437 | */ |
8bd7f91c MB |
1438 | if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) { |
1439 | force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0); | |
1440 | return; | |
1441 | } | |
1442 | ||
1443 | sve_alloc(current); | |
1444 | sme_alloc(current); | |
1445 | if (!current->thread.sve_state || !current->thread.za_state) { | |
1446 | force_sig(SIGKILL); | |
1447 | return; | |
1448 | } | |
1449 | ||
1450 | get_cpu_fpsimd_context(); | |
1451 | ||
1452 | /* With TIF_SME userspace shouldn't generate any traps */ | |
1453 | if (test_and_set_thread_flag(TIF_SME)) | |
1454 | WARN_ON(1); | |
1455 | ||
cccb78ce | 1456 | if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) { |
ad4711f9 | 1457 | unsigned long vq_minus_one = |
8bd7f91c MB |
1458 | sve_vq_from_vl(task_get_sme_vl(current)) - 1; |
1459 | sme_set_vq(vq_minus_one); | |
1460 | ||
cccb78ce | 1461 | fpsimd_bind_task_to_cpu(); |
cccb78ce MB |
1462 | } |
1463 | ||
8bd7f91c MB |
1464 | /* |
1465 | * If SVE was not already active initialise the SVE registers, | |
1466 | * any non-shared state between the streaming and regular SVE | |
1467 | * registers is architecturally guaranteed to be zeroed when | |
1468 | * we enter streaming mode. We do not need to initialize ZA | |
1469 | * since ZA must be disabled at this point and enabling ZA is | |
1470 | * architecturally defined to zero ZA. | |
1471 | */ | |
1472 | if (system_supports_sve() && !test_thread_flag(TIF_SVE)) | |
1473 | sve_init_regs(); | |
1474 | ||
6dcdefcd | 1475 | put_cpu_fpsimd_context(); |
bc0ee476 DM |
1476 | } |
1477 | ||
53631b54 CM |
1478 | /* |
1479 | * Trapped FP/ASIMD access. | |
1480 | */ | |
8d56e5c5 | 1481 | void do_fpsimd_acc(unsigned long esr, struct pt_regs *regs) |
53631b54 CM |
1482 | { |
1483 | /* TODO: implement lazy context saving/restoring */ | |
1484 | WARN_ON(1); | |
1485 | } | |
1486 | ||
1487 | /* | |
1488 | * Raise a SIGFPE for the current process. | |
1489 | */ | |
8d56e5c5 | 1490 | void do_fpsimd_exc(unsigned long esr, struct pt_regs *regs) |
53631b54 | 1491 | { |
af4a81b9 DM |
1492 | unsigned int si_code = FPE_FLTUNK; |
1493 | ||
1494 | if (esr & ESR_ELx_FP_EXC_TFV) { | |
1495 | if (esr & FPEXC_IOF) | |
1496 | si_code = FPE_FLTINV; | |
1497 | else if (esr & FPEXC_DZF) | |
1498 | si_code = FPE_FLTDIV; | |
1499 | else if (esr & FPEXC_OFF) | |
1500 | si_code = FPE_FLTOVF; | |
1501 | else if (esr & FPEXC_UFF) | |
1502 | si_code = FPE_FLTUND; | |
1503 | else if (esr & FPEXC_IXF) | |
1504 | si_code = FPE_FLTRES; | |
1505 | } | |
53631b54 | 1506 | |
c8526809 EB |
1507 | send_sig_fault(SIGFPE, si_code, |
1508 | (void __user *)instruction_pointer(regs), | |
1509 | current); | |
53631b54 CM |
1510 | } |
1511 | ||
1512 | void fpsimd_thread_switch(struct task_struct *next) | |
1513 | { | |
df3fb968 DM |
1514 | bool wrong_task, wrong_cpu; |
1515 | ||
82e0191a SP |
1516 | if (!system_supports_fpsimd()) |
1517 | return; | |
df3fb968 | 1518 | |
6dcdefcd JG |
1519 | __get_cpu_fpsimd_context(); |
1520 | ||
df3fb968 DM |
1521 | /* Save unsaved fpsimd state, if any: */ |
1522 | fpsimd_save(); | |
1523 | ||
005f78cd | 1524 | /* |
df3fb968 DM |
1525 | * Fix up TIF_FOREIGN_FPSTATE to correctly describe next's |
1526 | * state. For kernel threads, FPSIMD registers are never loaded | |
1527 | * and wrong_task and wrong_cpu will always be true. | |
005f78cd | 1528 | */ |
df3fb968 | 1529 | wrong_task = __this_cpu_read(fpsimd_last_state.st) != |
09d1223a | 1530 | &next->thread.uw.fpsimd_state; |
df3fb968 | 1531 | wrong_cpu = next->thread.fpsimd_cpu != smp_processor_id(); |
005f78cd | 1532 | |
df3fb968 DM |
1533 | update_tsk_thread_flag(next, TIF_FOREIGN_FPSTATE, |
1534 | wrong_task || wrong_cpu); | |
6dcdefcd JG |
1535 | |
1536 | __put_cpu_fpsimd_context(); | |
53631b54 CM |
1537 | } |
1538 | ||
5838a155 | 1539 | static void fpsimd_flush_thread_vl(enum vec_type type) |
53631b54 | 1540 | { |
7582e220 | 1541 | int vl, supported_vl; |
bc0ee476 | 1542 | |
5838a155 MB |
1543 | /* |
1544 | * Reset the task vector length as required. This is where we | |
1545 | * ensure that all user tasks have a valid vector length | |
1546 | * configured: no kernel task can become a user task without | |
1547 | * an exec and hence a call to this function. By the time the | |
1548 | * first call to this function is made, all early hardware | |
1549 | * probing is complete, so __sve_default_vl should be valid. | |
1550 | * If a bug causes this to go wrong, we make some noise and | |
1551 | * try to fudge thread.sve_vl to a safe value here. | |
1552 | */ | |
1553 | vl = task_get_vl_onexec(current, type); | |
1554 | if (!vl) | |
1555 | vl = get_default_vl(type); | |
1556 | ||
1557 | if (WARN_ON(!sve_vl_valid(vl))) | |
97bcbee4 | 1558 | vl = vl_info[type].min_vl; |
5838a155 MB |
1559 | |
1560 | supported_vl = find_supported_vector_length(type, vl); | |
1561 | if (WARN_ON(supported_vl != vl)) | |
1562 | vl = supported_vl; | |
1563 | ||
1564 | task_set_vl(current, type, vl); | |
1565 | ||
1566 | /* | |
1567 | * If the task is not set to inherit, ensure that the vector | |
1568 | * length will be reset by a subsequent exec: | |
1569 | */ | |
1570 | if (!test_thread_flag(vec_vl_inherit_flag(type))) | |
1571 | task_set_vl_onexec(current, type, 0); | |
1572 | } | |
1573 | ||
1574 | void fpsimd_flush_thread(void) | |
1575 | { | |
a1259dd8 SAS |
1576 | void *sve_state = NULL; |
1577 | void *za_state = NULL; | |
1578 | ||
82e0191a SP |
1579 | if (!system_supports_fpsimd()) |
1580 | return; | |
cb84d11e | 1581 | |
6dcdefcd | 1582 | get_cpu_fpsimd_context(); |
cb84d11e | 1583 | |
efbc2024 | 1584 | fpsimd_flush_task_state(current); |
65896545 DM |
1585 | memset(¤t->thread.uw.fpsimd_state, 0, |
1586 | sizeof(current->thread.uw.fpsimd_state)); | |
bc0ee476 DM |
1587 | |
1588 | if (system_supports_sve()) { | |
1589 | clear_thread_flag(TIF_SVE); | |
a1259dd8 SAS |
1590 | |
1591 | /* Defer kfree() while in atomic context */ | |
1592 | sve_state = current->thread.sve_state; | |
1593 | current->thread.sve_state = NULL; | |
1594 | ||
5838a155 | 1595 | fpsimd_flush_thread_vl(ARM64_VEC_SVE); |
bc0ee476 DM |
1596 | } |
1597 | ||
8bd7f91c MB |
1598 | if (system_supports_sme()) { |
1599 | clear_thread_flag(TIF_SME); | |
a1259dd8 SAS |
1600 | |
1601 | /* Defer kfree() while in atomic context */ | |
1602 | za_state = current->thread.za_state; | |
1603 | current->thread.za_state = NULL; | |
1604 | ||
af7167d6 | 1605 | fpsimd_flush_thread_vl(ARM64_VEC_SME); |
8bd7f91c MB |
1606 | current->thread.svcr = 0; |
1607 | } | |
af7167d6 | 1608 | |
6dcdefcd | 1609 | put_cpu_fpsimd_context(); |
a1259dd8 SAS |
1610 | kfree(sve_state); |
1611 | kfree(za_state); | |
53631b54 CM |
1612 | } |
1613 | ||
c51f9269 | 1614 | /* |
005f78cd AB |
1615 | * Save the userland FPSIMD state of 'current' to memory, but only if the state |
1616 | * currently held in the registers does in fact belong to 'current' | |
c51f9269 AB |
1617 | */ |
1618 | void fpsimd_preserve_current_state(void) | |
1619 | { | |
82e0191a SP |
1620 | if (!system_supports_fpsimd()) |
1621 | return; | |
cb84d11e | 1622 | |
6dcdefcd | 1623 | get_cpu_fpsimd_context(); |
d1797615 | 1624 | fpsimd_save(); |
6dcdefcd | 1625 | put_cpu_fpsimd_context(); |
c51f9269 AB |
1626 | } |
1627 | ||
8cd969d2 DM |
1628 | /* |
1629 | * Like fpsimd_preserve_current_state(), but ensure that | |
65896545 | 1630 | * current->thread.uw.fpsimd_state is updated so that it can be copied to |
8cd969d2 DM |
1631 | * the signal frame. |
1632 | */ | |
1633 | void fpsimd_signal_preserve_current_state(void) | |
1634 | { | |
1635 | fpsimd_preserve_current_state(); | |
ef9c5d09 | 1636 | if (test_thread_flag(TIF_SVE)) |
8cd969d2 DM |
1637 | sve_to_fpsimd(current); |
1638 | } | |
1639 | ||
8884b7bd DM |
1640 | /* |
1641 | * Associate current's FPSIMD context with this cpu | |
6dcdefcd JG |
1642 | * The caller must have ownership of the cpu FPSIMD context before calling |
1643 | * this function. | |
8884b7bd | 1644 | */ |
b24b5205 | 1645 | static void fpsimd_bind_task_to_cpu(void) |
8884b7bd | 1646 | { |
cb968afc DM |
1647 | struct fpsimd_last_state_struct *last = |
1648 | this_cpu_ptr(&fpsimd_last_state); | |
8884b7bd | 1649 | |
52f73c38 | 1650 | WARN_ON(!system_supports_fpsimd()); |
65896545 | 1651 | last->st = ¤t->thread.uw.fpsimd_state; |
04950674 | 1652 | last->sve_state = current->thread.sve_state; |
0033cd93 | 1653 | last->za_state = current->thread.za_state; |
0423eedc | 1654 | last->sve_vl = task_get_sve_vl(current); |
af7167d6 | 1655 | last->sme_vl = task_get_sme_vl(current); |
b40c559b | 1656 | last->svcr = ¤t->thread.svcr; |
20b85472 | 1657 | current->thread.fpsimd_cpu = smp_processor_id(); |
0cff8e77 | 1658 | |
8bd7f91c MB |
1659 | /* |
1660 | * Toggle SVE and SME trapping for userspace if needed, these | |
1661 | * are serialsied by ret_to_user(). | |
1662 | */ | |
1663 | if (system_supports_sme()) { | |
1664 | if (test_thread_flag(TIF_SME)) | |
1665 | sme_user_enable(); | |
1666 | else | |
1667 | sme_user_disable(); | |
1668 | } | |
1669 | ||
0cff8e77 | 1670 | if (system_supports_sve()) { |
0cff8e77 DM |
1671 | if (test_thread_flag(TIF_SVE)) |
1672 | sve_user_enable(); | |
1673 | else | |
1674 | sve_user_disable(); | |
0cff8e77 | 1675 | } |
8884b7bd DM |
1676 | } |
1677 | ||
04950674 | 1678 | void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state, |
0033cd93 MB |
1679 | unsigned int sve_vl, void *za_state, |
1680 | unsigned int sme_vl, u64 *svcr) | |
e6b673b7 DM |
1681 | { |
1682 | struct fpsimd_last_state_struct *last = | |
1683 | this_cpu_ptr(&fpsimd_last_state); | |
1684 | ||
52f73c38 | 1685 | WARN_ON(!system_supports_fpsimd()); |
e6b673b7 DM |
1686 | WARN_ON(!in_softirq() && !irqs_disabled()); |
1687 | ||
1688 | last->st = st; | |
b40c559b | 1689 | last->svcr = svcr; |
04950674 | 1690 | last->sve_state = sve_state; |
0033cd93 | 1691 | last->za_state = za_state; |
04950674 | 1692 | last->sve_vl = sve_vl; |
af7167d6 | 1693 | last->sme_vl = sme_vl; |
8884b7bd DM |
1694 | } |
1695 | ||
c51f9269 | 1696 | /* |
005f78cd AB |
1697 | * Load the userland FPSIMD state of 'current' from memory, but only if the |
1698 | * FPSIMD state already held in the registers is /not/ the most recent FPSIMD | |
12b792e5 MB |
1699 | * state of 'current'. This is called when we are preparing to return to |
1700 | * userspace to ensure that userspace sees a good register state. | |
005f78cd AB |
1701 | */ |
1702 | void fpsimd_restore_current_state(void) | |
1703 | { | |
52f73c38 SP |
1704 | /* |
1705 | * For the tasks that were created before we detected the absence of | |
1706 | * FP/SIMD, the TIF_FOREIGN_FPSTATE could be set via fpsimd_thread_switch(), | |
1707 | * e.g, init. This could be then inherited by the children processes. | |
1708 | * If we later detect that the system doesn't support FP/SIMD, | |
1709 | * we must clear the flag for all the tasks to indicate that the | |
1710 | * FPSTATE is clean (as we can't have one) to avoid looping for ever in | |
1711 | * do_notify_resume(). | |
1712 | */ | |
1713 | if (!system_supports_fpsimd()) { | |
1714 | clear_thread_flag(TIF_FOREIGN_FPSTATE); | |
82e0191a | 1715 | return; |
52f73c38 | 1716 | } |
cb84d11e | 1717 | |
6dcdefcd | 1718 | get_cpu_fpsimd_context(); |
cb84d11e | 1719 | |
005f78cd | 1720 | if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { |
bc0ee476 | 1721 | task_fpsimd_load(); |
0cff8e77 | 1722 | fpsimd_bind_task_to_cpu(); |
005f78cd | 1723 | } |
cb84d11e | 1724 | |
6dcdefcd | 1725 | put_cpu_fpsimd_context(); |
005f78cd AB |
1726 | } |
1727 | ||
1728 | /* | |
1729 | * Load an updated userland FPSIMD state for 'current' from memory and set the | |
1730 | * flag that indicates that the FPSIMD register contents are the most recent | |
12b792e5 MB |
1731 | * FPSIMD state of 'current'. This is used by the signal code to restore the |
1732 | * register state when returning from a signal handler in FPSIMD only cases, | |
1733 | * any SVE context will be discarded. | |
c51f9269 | 1734 | */ |
0abdeff5 | 1735 | void fpsimd_update_current_state(struct user_fpsimd_state const *state) |
c51f9269 | 1736 | { |
52f73c38 | 1737 | if (WARN_ON(!system_supports_fpsimd())) |
82e0191a | 1738 | return; |
cb84d11e | 1739 | |
6dcdefcd | 1740 | get_cpu_fpsimd_context(); |
cb84d11e | 1741 | |
65896545 | 1742 | current->thread.uw.fpsimd_state = *state; |
ef9c5d09 | 1743 | if (test_thread_flag(TIF_SVE)) |
8cd969d2 | 1744 | fpsimd_to_sve(current); |
9de52a75 | 1745 | |
8cd969d2 | 1746 | task_fpsimd_load(); |
0cff8e77 | 1747 | fpsimd_bind_task_to_cpu(); |
8cd969d2 | 1748 | |
0cff8e77 | 1749 | clear_thread_flag(TIF_FOREIGN_FPSTATE); |
cb84d11e | 1750 | |
6dcdefcd | 1751 | put_cpu_fpsimd_context(); |
c51f9269 AB |
1752 | } |
1753 | ||
005f78cd AB |
1754 | /* |
1755 | * Invalidate live CPU copies of task t's FPSIMD state | |
efbc2024 DM |
1756 | * |
1757 | * This function may be called with preemption enabled. The barrier() | |
1758 | * ensures that the assignment to fpsimd_cpu is visible to any | |
1759 | * preemption/softirq that could race with set_tsk_thread_flag(), so | |
1760 | * that TIF_FOREIGN_FPSTATE cannot be spuriously re-cleared. | |
1761 | * | |
1762 | * The final barrier ensures that TIF_FOREIGN_FPSTATE is seen set by any | |
1763 | * subsequent code. | |
005f78cd AB |
1764 | */ |
1765 | void fpsimd_flush_task_state(struct task_struct *t) | |
1766 | { | |
20b85472 | 1767 | t->thread.fpsimd_cpu = NR_CPUS; |
52f73c38 SP |
1768 | /* |
1769 | * If we don't support fpsimd, bail out after we have | |
1770 | * reset the fpsimd_cpu for this task and clear the | |
1771 | * FPSTATE. | |
1772 | */ | |
1773 | if (!system_supports_fpsimd()) | |
1774 | return; | |
efbc2024 DM |
1775 | barrier(); |
1776 | set_tsk_thread_flag(t, TIF_FOREIGN_FPSTATE); | |
1777 | ||
1778 | barrier(); | |
005f78cd AB |
1779 | } |
1780 | ||
efbc2024 DM |
1781 | /* |
1782 | * Invalidate any task's FPSIMD state that is present on this cpu. | |
6dcdefcd JG |
1783 | * The FPSIMD context should be acquired with get_cpu_fpsimd_context() |
1784 | * before calling this function. | |
efbc2024 | 1785 | */ |
54b8c7cb | 1786 | static void fpsimd_flush_cpu_state(void) |
17eed27b | 1787 | { |
52f73c38 | 1788 | WARN_ON(!system_supports_fpsimd()); |
cb968afc | 1789 | __this_cpu_write(fpsimd_last_state.st, NULL); |
d45d7ff7 MB |
1790 | |
1791 | /* | |
1792 | * Leaving streaming mode enabled will cause issues for any kernel | |
1793 | * NEON and leaving streaming mode or ZA enabled may increase power | |
1794 | * consumption. | |
1795 | */ | |
1796 | if (system_supports_sme()) | |
1797 | sme_smstop(); | |
1798 | ||
d8ad71fa | 1799 | set_thread_flag(TIF_FOREIGN_FPSTATE); |
17eed27b DM |
1800 | } |
1801 | ||
54b8c7cb JG |
1802 | /* |
1803 | * Save the FPSIMD state to memory and invalidate cpu view. | |
6dcdefcd | 1804 | * This function must be called with preemption disabled. |
54b8c7cb JG |
1805 | */ |
1806 | void fpsimd_save_and_flush_cpu_state(void) | |
1807 | { | |
52f73c38 SP |
1808 | if (!system_supports_fpsimd()) |
1809 | return; | |
6dcdefcd JG |
1810 | WARN_ON(preemptible()); |
1811 | __get_cpu_fpsimd_context(); | |
54b8c7cb JG |
1812 | fpsimd_save(); |
1813 | fpsimd_flush_cpu_state(); | |
6dcdefcd | 1814 | __put_cpu_fpsimd_context(); |
54b8c7cb | 1815 | } |
4cfb3613 | 1816 | |
4cfb3613 | 1817 | #ifdef CONFIG_KERNEL_MODE_NEON |
190f1ca8 | 1818 | |
4cfb3613 AB |
1819 | /* |
1820 | * Kernel-side NEON support functions | |
1821 | */ | |
cb84d11e DM |
1822 | |
1823 | /* | |
1824 | * kernel_neon_begin(): obtain the CPU FPSIMD registers for use by the calling | |
1825 | * context | |
1826 | * | |
1827 | * Must not be called unless may_use_simd() returns true. | |
1828 | * Task context in the FPSIMD registers is saved back to memory as necessary. | |
1829 | * | |
1830 | * A matching call to kernel_neon_end() must be made before returning from the | |
1831 | * calling context. | |
1832 | * | |
1833 | * The caller may freely use the FPSIMD registers until kernel_neon_end() is | |
1834 | * called. | |
1835 | */ | |
1836 | void kernel_neon_begin(void) | |
4cfb3613 | 1837 | { |
82e0191a SP |
1838 | if (WARN_ON(!system_supports_fpsimd())) |
1839 | return; | |
4cfb3613 | 1840 | |
cb84d11e DM |
1841 | BUG_ON(!may_use_simd()); |
1842 | ||
6dcdefcd | 1843 | get_cpu_fpsimd_context(); |
cb84d11e | 1844 | |
df3fb968 DM |
1845 | /* Save unsaved fpsimd state, if any: */ |
1846 | fpsimd_save(); | |
cb84d11e DM |
1847 | |
1848 | /* Invalidate any task state remaining in the fpsimd regs: */ | |
17eed27b | 1849 | fpsimd_flush_cpu_state(); |
4cfb3613 | 1850 | } |
cb84d11e | 1851 | EXPORT_SYMBOL(kernel_neon_begin); |
4cfb3613 | 1852 | |
cb84d11e DM |
1853 | /* |
1854 | * kernel_neon_end(): give the CPU FPSIMD registers back to the current task | |
1855 | * | |
1856 | * Must be called from a context in which kernel_neon_begin() was previously | |
1857 | * called, with no call to kernel_neon_end() in the meantime. | |
1858 | * | |
1859 | * The caller must not use the FPSIMD registers after this function is called, | |
1860 | * unless kernel_neon_begin() is called again in the meantime. | |
1861 | */ | |
4cfb3613 AB |
1862 | void kernel_neon_end(void) |
1863 | { | |
82e0191a SP |
1864 | if (!system_supports_fpsimd()) |
1865 | return; | |
cb84d11e | 1866 | |
6dcdefcd | 1867 | put_cpu_fpsimd_context(); |
4cfb3613 AB |
1868 | } |
1869 | EXPORT_SYMBOL(kernel_neon_end); | |
1870 | ||
e580b8bc DM |
1871 | #ifdef CONFIG_EFI |
1872 | ||
20b85472 | 1873 | static DEFINE_PER_CPU(struct user_fpsimd_state, efi_fpsimd_state); |
3b66023d | 1874 | static DEFINE_PER_CPU(bool, efi_fpsimd_state_used); |
fdfa976c | 1875 | static DEFINE_PER_CPU(bool, efi_sve_state_used); |
e0838f63 | 1876 | static DEFINE_PER_CPU(bool, efi_sm_state); |
4328825d DM |
1877 | |
1878 | /* | |
1879 | * EFI runtime services support functions | |
1880 | * | |
1881 | * The ABI for EFI runtime services allows EFI to use FPSIMD during the call. | |
1882 | * This means that for EFI (and only for EFI), we have to assume that FPSIMD | |
1883 | * is always used rather than being an optional accelerator. | |
1884 | * | |
1885 | * These functions provide the necessary support for ensuring FPSIMD | |
1886 | * save/restore in the contexts from which EFI is used. | |
1887 | * | |
1888 | * Do not use them for any other purpose -- if tempted to do so, you are | |
1889 | * either doing something wrong or you need to propose some refactoring. | |
1890 | */ | |
1891 | ||
1892 | /* | |
1893 | * __efi_fpsimd_begin(): prepare FPSIMD for making an EFI runtime services call | |
1894 | */ | |
1895 | void __efi_fpsimd_begin(void) | |
1896 | { | |
1897 | if (!system_supports_fpsimd()) | |
1898 | return; | |
1899 | ||
1900 | WARN_ON(preemptible()); | |
1901 | ||
fdfa976c | 1902 | if (may_use_simd()) { |
4328825d | 1903 | kernel_neon_begin(); |
fdfa976c DM |
1904 | } else { |
1905 | /* | |
1906 | * If !efi_sve_state, SVE can't be in use yet and doesn't need | |
1907 | * preserving: | |
1908 | */ | |
1909 | if (system_supports_sve() && likely(efi_sve_state)) { | |
1910 | char *sve_state = this_cpu_ptr(efi_sve_state); | |
e0838f63 MB |
1911 | bool ffr = true; |
1912 | u64 svcr; | |
fdfa976c DM |
1913 | |
1914 | __this_cpu_write(efi_sve_state_used, true); | |
1915 | ||
e0838f63 | 1916 | if (system_supports_sme()) { |
ec0067a6 | 1917 | svcr = read_sysreg_s(SYS_SVCR); |
e0838f63 MB |
1918 | |
1919 | if (!system_supports_fa64()) | |
ec0067a6 | 1920 | ffr = svcr & SVCR_SM_MASK; |
e0838f63 MB |
1921 | |
1922 | __this_cpu_write(efi_sm_state, ffr); | |
1923 | } | |
1924 | ||
b5bc00ff | 1925 | sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()), |
9f584866 | 1926 | &this_cpu_ptr(&efi_fpsimd_state)->fpsr, |
e0838f63 MB |
1927 | ffr); |
1928 | ||
1929 | if (system_supports_sme()) | |
ec0067a6 MB |
1930 | sysreg_clear_set_s(SYS_SVCR, |
1931 | SVCR_SM_MASK, 0); | |
e0838f63 | 1932 | |
fdfa976c DM |
1933 | } else { |
1934 | fpsimd_save_state(this_cpu_ptr(&efi_fpsimd_state)); | |
1935 | } | |
1936 | ||
4328825d DM |
1937 | __this_cpu_write(efi_fpsimd_state_used, true); |
1938 | } | |
1939 | } | |
1940 | ||
1941 | /* | |
1942 | * __efi_fpsimd_end(): clean up FPSIMD after an EFI runtime services call | |
1943 | */ | |
1944 | void __efi_fpsimd_end(void) | |
1945 | { | |
1946 | if (!system_supports_fpsimd()) | |
1947 | return; | |
1948 | ||
fdfa976c | 1949 | if (!__this_cpu_xchg(efi_fpsimd_state_used, false)) { |
4328825d | 1950 | kernel_neon_end(); |
fdfa976c DM |
1951 | } else { |
1952 | if (system_supports_sve() && | |
1953 | likely(__this_cpu_read(efi_sve_state_used))) { | |
1954 | char const *sve_state = this_cpu_ptr(efi_sve_state); | |
e0838f63 MB |
1955 | bool ffr = true; |
1956 | ||
1957 | /* | |
1958 | * Restore streaming mode; EFI calls are | |
1959 | * normal function calls so should not return in | |
1960 | * streaming mode. | |
1961 | */ | |
1962 | if (system_supports_sme()) { | |
1963 | if (__this_cpu_read(efi_sm_state)) { | |
ec0067a6 | 1964 | sysreg_clear_set_s(SYS_SVCR, |
e0838f63 | 1965 | 0, |
ec0067a6 | 1966 | SVCR_SM_MASK); |
e0838f63 MB |
1967 | if (!system_supports_fa64()) |
1968 | ffr = efi_sm_state; | |
1969 | } | |
1970 | } | |
fdfa976c | 1971 | |
b5bc00ff | 1972 | sve_load_state(sve_state + sve_ffr_offset(sve_max_vl()), |
fdfa976c | 1973 | &this_cpu_ptr(&efi_fpsimd_state)->fpsr, |
e0838f63 | 1974 | ffr); |
fdfa976c DM |
1975 | |
1976 | __this_cpu_write(efi_sve_state_used, false); | |
1977 | } else { | |
1978 | fpsimd_load_state(this_cpu_ptr(&efi_fpsimd_state)); | |
1979 | } | |
1980 | } | |
4328825d DM |
1981 | } |
1982 | ||
e580b8bc DM |
1983 | #endif /* CONFIG_EFI */ |
1984 | ||
4cfb3613 AB |
1985 | #endif /* CONFIG_KERNEL_MODE_NEON */ |
1986 | ||
fb1ab1ab LP |
1987 | #ifdef CONFIG_CPU_PM |
1988 | static int fpsimd_cpu_pm_notifier(struct notifier_block *self, | |
1989 | unsigned long cmd, void *v) | |
1990 | { | |
1991 | switch (cmd) { | |
1992 | case CPU_PM_ENTER: | |
54b8c7cb | 1993 | fpsimd_save_and_flush_cpu_state(); |
fb1ab1ab LP |
1994 | break; |
1995 | case CPU_PM_EXIT: | |
fb1ab1ab LP |
1996 | break; |
1997 | case CPU_PM_ENTER_FAILED: | |
1998 | default: | |
1999 | return NOTIFY_DONE; | |
2000 | } | |
2001 | return NOTIFY_OK; | |
2002 | } | |
2003 | ||
2004 | static struct notifier_block fpsimd_cpu_pm_notifier_block = { | |
2005 | .notifier_call = fpsimd_cpu_pm_notifier, | |
2006 | }; | |
2007 | ||
a7c61a34 | 2008 | static void __init fpsimd_pm_init(void) |
fb1ab1ab LP |
2009 | { |
2010 | cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); | |
2011 | } | |
2012 | ||
2013 | #else | |
2014 | static inline void fpsimd_pm_init(void) { } | |
2015 | #endif /* CONFIG_CPU_PM */ | |
2016 | ||
32365e64 | 2017 | #ifdef CONFIG_HOTPLUG_CPU |
c23a7266 | 2018 | static int fpsimd_cpu_dead(unsigned int cpu) |
32365e64 | 2019 | { |
cb968afc | 2020 | per_cpu(fpsimd_last_state.st, cpu) = NULL; |
c23a7266 | 2021 | return 0; |
32365e64 JL |
2022 | } |
2023 | ||
32365e64 JL |
2024 | static inline void fpsimd_hotplug_init(void) |
2025 | { | |
c23a7266 SAS |
2026 | cpuhp_setup_state_nocalls(CPUHP_ARM64_FPSIMD_DEAD, "arm64/fpsimd:dead", |
2027 | NULL, fpsimd_cpu_dead); | |
32365e64 JL |
2028 | } |
2029 | ||
2030 | #else | |
2031 | static inline void fpsimd_hotplug_init(void) { } | |
2032 | #endif | |
2033 | ||
53631b54 CM |
2034 | /* |
2035 | * FP/SIMD support code initialisation. | |
2036 | */ | |
2037 | static int __init fpsimd_init(void) | |
2038 | { | |
aaba098f | 2039 | if (cpu_have_named_feature(FP)) { |
fe80f9f2 SP |
2040 | fpsimd_pm_init(); |
2041 | fpsimd_hotplug_init(); | |
2042 | } else { | |
53631b54 | 2043 | pr_notice("Floating-point is not implemented\n"); |
53631b54 | 2044 | } |
53631b54 | 2045 | |
aaba098f | 2046 | if (!cpu_have_named_feature(ASIMD)) |
53631b54 | 2047 | pr_notice("Advanced SIMD is not implemented\n"); |
fb1ab1ab | 2048 | |
5e64b862 MB |
2049 | |
2050 | if (cpu_have_named_feature(SME) && !cpu_have_named_feature(SVE)) | |
2051 | pr_notice("SME is implemented but not SVE\n"); | |
2052 | ||
12f1bacf MB |
2053 | sve_sysctl_init(); |
2054 | sme_sysctl_init(); | |
2055 | ||
2056 | return 0; | |
53631b54 | 2057 | } |
ae2e972d | 2058 | core_initcall(fpsimd_init); |