Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
1da177e4 | 2 | /* |
4baa9922 | 3 | * arch/arm/include/asm/xor.h |
1da177e4 LT |
4 | * |
5 | * Copyright (C) 2001 Russell King | |
1da177e4 | 6 | */ |
01956597 | 7 | #include <linux/hardirq.h> |
1da177e4 | 8 | #include <asm-generic/xor.h> |
01956597 AB |
9 | #include <asm/hwcap.h> |
10 | #include <asm/neon.h> | |
1da177e4 LT |
11 | |
12 | #define __XOR(a1, a2) a1 ^= a2 | |
13 | ||
14 | #define GET_BLOCK_2(dst) \ | |
15 | __asm__("ldmia %0, {%1, %2}" \ | |
16 | : "=r" (dst), "=r" (a1), "=r" (a2) \ | |
17 | : "0" (dst)) | |
18 | ||
19 | #define GET_BLOCK_4(dst) \ | |
20 | __asm__("ldmia %0, {%1, %2, %3, %4}" \ | |
21 | : "=r" (dst), "=r" (a1), "=r" (a2), "=r" (a3), "=r" (a4) \ | |
22 | : "0" (dst)) | |
23 | ||
24 | #define XOR_BLOCK_2(src) \ | |
25 | __asm__("ldmia %0!, {%1, %2}" \ | |
26 | : "=r" (src), "=r" (b1), "=r" (b2) \ | |
27 | : "0" (src)); \ | |
28 | __XOR(a1, b1); __XOR(a2, b2); | |
29 | ||
30 | #define XOR_BLOCK_4(src) \ | |
31 | __asm__("ldmia %0!, {%1, %2, %3, %4}" \ | |
32 | : "=r" (src), "=r" (b1), "=r" (b2), "=r" (b3), "=r" (b4) \ | |
33 | : "0" (src)); \ | |
34 | __XOR(a1, b1); __XOR(a2, b2); __XOR(a3, b3); __XOR(a4, b4) | |
35 | ||
36 | #define PUT_BLOCK_2(dst) \ | |
37 | __asm__ __volatile__("stmia %0!, {%2, %3}" \ | |
38 | : "=r" (dst) \ | |
39 | : "0" (dst), "r" (a1), "r" (a2)) | |
40 | ||
41 | #define PUT_BLOCK_4(dst) \ | |
42 | __asm__ __volatile__("stmia %0!, {%2, %3, %4, %5}" \ | |
43 | : "=r" (dst) \ | |
44 | : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) | |
45 | ||
46 | static void | |
47 | xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
48 | { | |
49 | unsigned int lines = bytes / sizeof(unsigned long) / 4; | |
50 | register unsigned int a1 __asm__("r4"); | |
51 | register unsigned int a2 __asm__("r5"); | |
52 | register unsigned int a3 __asm__("r6"); | |
53 | register unsigned int a4 __asm__("r7"); | |
54 | register unsigned int b1 __asm__("r8"); | |
55 | register unsigned int b2 __asm__("r9"); | |
56 | register unsigned int b3 __asm__("ip"); | |
57 | register unsigned int b4 __asm__("lr"); | |
58 | ||
59 | do { | |
60 | GET_BLOCK_4(p1); | |
61 | XOR_BLOCK_4(p2); | |
62 | PUT_BLOCK_4(p1); | |
63 | } while (--lines); | |
64 | } | |
65 | ||
66 | static void | |
67 | xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
68 | unsigned long *p3) | |
69 | { | |
70 | unsigned int lines = bytes / sizeof(unsigned long) / 4; | |
71 | register unsigned int a1 __asm__("r4"); | |
72 | register unsigned int a2 __asm__("r5"); | |
73 | register unsigned int a3 __asm__("r6"); | |
74 | register unsigned int a4 __asm__("r7"); | |
75 | register unsigned int b1 __asm__("r8"); | |
76 | register unsigned int b2 __asm__("r9"); | |
77 | register unsigned int b3 __asm__("ip"); | |
78 | register unsigned int b4 __asm__("lr"); | |
79 | ||
80 | do { | |
81 | GET_BLOCK_4(p1); | |
82 | XOR_BLOCK_4(p2); | |
83 | XOR_BLOCK_4(p3); | |
84 | PUT_BLOCK_4(p1); | |
85 | } while (--lines); | |
86 | } | |
87 | ||
88 | static void | |
89 | xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
90 | unsigned long *p3, unsigned long *p4) | |
91 | { | |
92 | unsigned int lines = bytes / sizeof(unsigned long) / 2; | |
93 | register unsigned int a1 __asm__("r8"); | |
94 | register unsigned int a2 __asm__("r9"); | |
95 | register unsigned int b1 __asm__("ip"); | |
96 | register unsigned int b2 __asm__("lr"); | |
97 | ||
98 | do { | |
99 | GET_BLOCK_2(p1); | |
100 | XOR_BLOCK_2(p2); | |
101 | XOR_BLOCK_2(p3); | |
102 | XOR_BLOCK_2(p4); | |
103 | PUT_BLOCK_2(p1); | |
104 | } while (--lines); | |
105 | } | |
106 | ||
107 | static void | |
108 | xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
109 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
110 | { | |
111 | unsigned int lines = bytes / sizeof(unsigned long) / 2; | |
112 | register unsigned int a1 __asm__("r8"); | |
113 | register unsigned int a2 __asm__("r9"); | |
114 | register unsigned int b1 __asm__("ip"); | |
115 | register unsigned int b2 __asm__("lr"); | |
116 | ||
117 | do { | |
118 | GET_BLOCK_2(p1); | |
119 | XOR_BLOCK_2(p2); | |
120 | XOR_BLOCK_2(p3); | |
121 | XOR_BLOCK_2(p4); | |
122 | XOR_BLOCK_2(p5); | |
123 | PUT_BLOCK_2(p1); | |
124 | } while (--lines); | |
125 | } | |
126 | ||
127 | static struct xor_block_template xor_block_arm4regs = { | |
128 | .name = "arm4regs", | |
129 | .do_2 = xor_arm4regs_2, | |
130 | .do_3 = xor_arm4regs_3, | |
131 | .do_4 = xor_arm4regs_4, | |
132 | .do_5 = xor_arm4regs_5, | |
133 | }; | |
134 | ||
135 | #undef XOR_TRY_TEMPLATES | |
136 | #define XOR_TRY_TEMPLATES \ | |
137 | do { \ | |
138 | xor_speed(&xor_block_arm4regs); \ | |
139 | xor_speed(&xor_block_8regs); \ | |
140 | xor_speed(&xor_block_32regs); \ | |
01956597 | 141 | NEON_TEMPLATES; \ |
1da177e4 | 142 | } while (0) |
01956597 AB |
143 | |
144 | #ifdef CONFIG_KERNEL_MODE_NEON | |
145 | ||
146 | extern struct xor_block_template const xor_block_neon_inner; | |
147 | ||
148 | static void | |
149 | xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) | |
150 | { | |
151 | if (in_interrupt()) { | |
152 | xor_arm4regs_2(bytes, p1, p2); | |
153 | } else { | |
154 | kernel_neon_begin(); | |
155 | xor_block_neon_inner.do_2(bytes, p1, p2); | |
156 | kernel_neon_end(); | |
157 | } | |
158 | } | |
159 | ||
160 | static void | |
161 | xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
162 | unsigned long *p3) | |
163 | { | |
164 | if (in_interrupt()) { | |
165 | xor_arm4regs_3(bytes, p1, p2, p3); | |
166 | } else { | |
167 | kernel_neon_begin(); | |
168 | xor_block_neon_inner.do_3(bytes, p1, p2, p3); | |
169 | kernel_neon_end(); | |
170 | } | |
171 | } | |
172 | ||
173 | static void | |
174 | xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
175 | unsigned long *p3, unsigned long *p4) | |
176 | { | |
177 | if (in_interrupt()) { | |
178 | xor_arm4regs_4(bytes, p1, p2, p3, p4); | |
179 | } else { | |
180 | kernel_neon_begin(); | |
181 | xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4); | |
182 | kernel_neon_end(); | |
183 | } | |
184 | } | |
185 | ||
186 | static void | |
187 | xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, | |
188 | unsigned long *p3, unsigned long *p4, unsigned long *p5) | |
189 | { | |
190 | if (in_interrupt()) { | |
191 | xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); | |
192 | } else { | |
193 | kernel_neon_begin(); | |
194 | xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5); | |
195 | kernel_neon_end(); | |
196 | } | |
197 | } | |
198 | ||
199 | static struct xor_block_template xor_block_neon = { | |
200 | .name = "neon", | |
201 | .do_2 = xor_neon_2, | |
202 | .do_3 = xor_neon_3, | |
203 | .do_4 = xor_neon_4, | |
204 | .do_5 = xor_neon_5 | |
205 | }; | |
206 | ||
207 | #define NEON_TEMPLATES \ | |
208 | do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0) | |
209 | #else | |
210 | #define NEON_TEMPLATES | |
211 | #endif |