Commit | Line | Data |
---|---|---|
c5af58b7 GR |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. | |
3 | ||
4 | #include <linux/linkage.h> | |
5 | ||
6 | .macro GET_FRONT_BITS rx y | |
7 | #ifdef __cskyLE__ | |
8 | lsri \rx, \y | |
9 | #else | |
10 | lsli \rx, \y | |
11 | #endif | |
12 | .endm | |
13 | ||
14 | .macro GET_AFTER_BITS rx y | |
15 | #ifdef __cskyLE__ | |
16 | lsli \rx, \y | |
17 | #else | |
18 | lsri \rx, \y | |
19 | #endif | |
20 | .endm | |
21 | ||
22 | /* void *memcpy(void *dest, const void *src, size_t n); */ | |
23 | ENTRY(memcpy) | |
24 | mov r7, r2 | |
25 | cmplti r4, 4 | |
26 | bt .L_copy_by_byte | |
27 | mov r6, r2 | |
28 | andi r6, 3 | |
29 | cmpnei r6, 0 | |
30 | jbt .L_dest_not_aligned | |
31 | mov r6, r3 | |
32 | andi r6, 3 | |
33 | cmpnei r6, 0 | |
34 | jbt .L_dest_aligned_but_src_not_aligned | |
35 | .L0: | |
36 | cmplti r4, 16 | |
37 | jbt .L_aligned_and_len_less_16bytes | |
38 | subi sp, 8 | |
39 | stw r8, (sp, 0) | |
40 | .L_aligned_and_len_larger_16bytes: | |
41 | ldw r1, (r3, 0) | |
42 | ldw r5, (r3, 4) | |
43 | ldw r8, (r3, 8) | |
44 | stw r1, (r7, 0) | |
45 | ldw r1, (r3, 12) | |
46 | stw r5, (r7, 4) | |
47 | stw r8, (r7, 8) | |
48 | stw r1, (r7, 12) | |
49 | subi r4, 16 | |
50 | addi r3, 16 | |
51 | addi r7, 16 | |
52 | cmplti r4, 16 | |
53 | jbf .L_aligned_and_len_larger_16bytes | |
54 | ldw r8, (sp, 0) | |
55 | addi sp, 8 | |
56 | cmpnei r4, 0 | |
57 | jbf .L_return | |
58 | ||
59 | .L_aligned_and_len_less_16bytes: | |
60 | cmplti r4, 4 | |
61 | bt .L_copy_by_byte | |
62 | .L1: | |
63 | ldw r1, (r3, 0) | |
64 | stw r1, (r7, 0) | |
65 | subi r4, 4 | |
66 | addi r3, 4 | |
67 | addi r7, 4 | |
68 | cmplti r4, 4 | |
69 | jbf .L1 | |
70 | br .L_copy_by_byte | |
71 | ||
72 | .L_return: | |
73 | rts | |
74 | ||
75 | .L_copy_by_byte: /* len less than 4 bytes */ | |
76 | cmpnei r4, 0 | |
77 | jbf .L_return | |
78 | .L4: | |
79 | ldb r1, (r3, 0) | |
80 | stb r1, (r7, 0) | |
81 | addi r3, 1 | |
82 | addi r7, 1 | |
83 | decne r4 | |
84 | jbt .L4 | |
85 | rts | |
86 | ||
87 | /* | |
88 | * If dest is not aligned, just copying some bytes makes the dest align. | |
89 | * Afther that, we judge whether the src is aligned. | |
90 | */ | |
91 | .L_dest_not_aligned: | |
92 | mov r5, r3 | |
93 | rsub r5, r5, r7 | |
94 | abs r5, r5 | |
95 | cmplt r5, r4 | |
96 | bt .L_copy_by_byte | |
97 | mov r5, r7 | |
98 | sub r5, r3 | |
99 | cmphs r5, r4 | |
100 | bf .L_copy_by_byte | |
101 | mov r5, r6 | |
102 | .L5: | |
103 | ldb r1, (r3, 0) /* makes the dest align. */ | |
104 | stb r1, (r7, 0) | |
105 | addi r5, 1 | |
106 | subi r4, 1 | |
107 | addi r3, 1 | |
108 | addi r7, 1 | |
109 | cmpnei r5, 4 | |
110 | jbt .L5 | |
111 | cmplti r4, 4 | |
112 | jbt .L_copy_by_byte | |
113 | mov r6, r3 /* judge whether the src is aligned. */ | |
114 | andi r6, 3 | |
115 | cmpnei r6, 0 | |
116 | jbf .L0 | |
117 | ||
118 | /* Judge the number of misaligned, 1, 2, 3? */ | |
119 | .L_dest_aligned_but_src_not_aligned: | |
120 | mov r5, r3 | |
121 | rsub r5, r5, r7 | |
122 | abs r5, r5 | |
123 | cmplt r5, r4 | |
124 | bt .L_copy_by_byte | |
125 | bclri r3, 0 | |
126 | bclri r3, 1 | |
127 | ldw r1, (r3, 0) | |
128 | addi r3, 4 | |
129 | cmpnei r6, 2 | |
130 | bf .L_dest_aligned_but_src_not_aligned_2bytes | |
131 | cmpnei r6, 3 | |
132 | bf .L_dest_aligned_but_src_not_aligned_3bytes | |
133 | ||
134 | .L_dest_aligned_but_src_not_aligned_1byte: | |
135 | mov r5, r7 | |
136 | sub r5, r3 | |
137 | cmphs r5, r4 | |
138 | bf .L_copy_by_byte | |
139 | cmplti r4, 16 | |
140 | bf .L11 | |
141 | .L10: /* If the len is less than 16 bytes */ | |
142 | GET_FRONT_BITS r1 8 | |
143 | mov r5, r1 | |
144 | ldw r6, (r3, 0) | |
145 | mov r1, r6 | |
146 | GET_AFTER_BITS r6 24 | |
147 | or r5, r6 | |
148 | stw r5, (r7, 0) | |
149 | subi r4, 4 | |
150 | addi r3, 4 | |
151 | addi r7, 4 | |
152 | cmplti r4, 4 | |
153 | bf .L10 | |
154 | subi r3, 3 | |
155 | br .L_copy_by_byte | |
156 | .L11: | |
157 | subi sp, 16 | |
158 | stw r8, (sp, 0) | |
159 | stw r9, (sp, 4) | |
160 | stw r10, (sp, 8) | |
161 | stw r11, (sp, 12) | |
162 | .L12: | |
163 | ldw r5, (r3, 0) | |
164 | ldw r11, (r3, 4) | |
165 | ldw r8, (r3, 8) | |
166 | ldw r9, (r3, 12) | |
167 | ||
168 | GET_FRONT_BITS r1 8 /* little or big endian? */ | |
169 | mov r10, r5 | |
170 | GET_AFTER_BITS r5 24 | |
171 | or r5, r1 | |
172 | ||
173 | GET_FRONT_BITS r10 8 | |
174 | mov r1, r11 | |
175 | GET_AFTER_BITS r11 24 | |
176 | or r11, r10 | |
177 | ||
178 | GET_FRONT_BITS r1 8 | |
179 | mov r10, r8 | |
180 | GET_AFTER_BITS r8 24 | |
181 | or r8, r1 | |
182 | ||
183 | GET_FRONT_BITS r10 8 | |
184 | mov r1, r9 | |
185 | GET_AFTER_BITS r9 24 | |
186 | or r9, r10 | |
187 | ||
188 | stw r5, (r7, 0) | |
189 | stw r11, (r7, 4) | |
190 | stw r8, (r7, 8) | |
191 | stw r9, (r7, 12) | |
192 | subi r4, 16 | |
193 | addi r3, 16 | |
194 | addi r7, 16 | |
195 | cmplti r4, 16 | |
196 | jbf .L12 | |
197 | ldw r8, (sp, 0) | |
198 | ldw r9, (sp, 4) | |
199 | ldw r10, (sp, 8) | |
200 | ldw r11, (sp, 12) | |
201 | addi sp , 16 | |
202 | cmplti r4, 4 | |
203 | bf .L10 | |
204 | subi r3, 3 | |
205 | br .L_copy_by_byte | |
206 | ||
207 | .L_dest_aligned_but_src_not_aligned_2bytes: | |
208 | cmplti r4, 16 | |
209 | bf .L21 | |
210 | .L20: | |
211 | GET_FRONT_BITS r1 16 | |
212 | mov r5, r1 | |
213 | ldw r6, (r3, 0) | |
214 | mov r1, r6 | |
215 | GET_AFTER_BITS r6 16 | |
216 | or r5, r6 | |
217 | stw r5, (r7, 0) | |
218 | subi r4, 4 | |
219 | addi r3, 4 | |
220 | addi r7, 4 | |
221 | cmplti r4, 4 | |
222 | bf .L20 | |
223 | subi r3, 2 | |
224 | br .L_copy_by_byte | |
225 | rts | |
226 | ||
227 | .L21: /* n > 16 */ | |
228 | subi sp, 16 | |
229 | stw r8, (sp, 0) | |
230 | stw r9, (sp, 4) | |
231 | stw r10, (sp, 8) | |
232 | stw r11, (sp, 12) | |
233 | ||
234 | .L22: | |
235 | ldw r5, (r3, 0) | |
236 | ldw r11, (r3, 4) | |
237 | ldw r8, (r3, 8) | |
238 | ldw r9, (r3, 12) | |
239 | ||
240 | GET_FRONT_BITS r1 16 | |
241 | mov r10, r5 | |
242 | GET_AFTER_BITS r5 16 | |
243 | or r5, r1 | |
244 | ||
245 | GET_FRONT_BITS r10 16 | |
246 | mov r1, r11 | |
247 | GET_AFTER_BITS r11 16 | |
248 | or r11, r10 | |
249 | ||
250 | GET_FRONT_BITS r1 16 | |
251 | mov r10, r8 | |
252 | GET_AFTER_BITS r8 16 | |
253 | or r8, r1 | |
254 | ||
255 | GET_FRONT_BITS r10 16 | |
256 | mov r1, r9 | |
257 | GET_AFTER_BITS r9 16 | |
258 | or r9, r10 | |
259 | ||
260 | stw r5, (r7, 0) | |
261 | stw r11, (r7, 4) | |
262 | stw r8, (r7, 8) | |
263 | stw r9, (r7, 12) | |
264 | subi r4, 16 | |
265 | addi r3, 16 | |
266 | addi r7, 16 | |
267 | cmplti r4, 16 | |
268 | jbf .L22 | |
269 | ldw r8, (sp, 0) | |
270 | ldw r9, (sp, 4) | |
271 | ldw r10, (sp, 8) | |
272 | ldw r11, (sp, 12) | |
273 | addi sp, 16 | |
274 | cmplti r4, 4 | |
275 | bf .L20 | |
276 | subi r3, 2 | |
277 | br .L_copy_by_byte | |
278 | ||
279 | ||
280 | .L_dest_aligned_but_src_not_aligned_3bytes: | |
281 | cmplti r4, 16 | |
282 | bf .L31 | |
283 | .L30: | |
284 | GET_FRONT_BITS r1 24 | |
285 | mov r5, r1 | |
286 | ldw r6, (r3, 0) | |
287 | mov r1, r6 | |
288 | GET_AFTER_BITS r6 8 | |
289 | or r5, r6 | |
290 | stw r5, (r7, 0) | |
291 | subi r4, 4 | |
292 | addi r3, 4 | |
293 | addi r7, 4 | |
294 | cmplti r4, 4 | |
295 | bf .L30 | |
296 | subi r3, 1 | |
297 | br .L_copy_by_byte | |
298 | .L31: | |
299 | subi sp, 16 | |
300 | stw r8, (sp, 0) | |
301 | stw r9, (sp, 4) | |
302 | stw r10, (sp, 8) | |
303 | stw r11, (sp, 12) | |
304 | .L32: | |
305 | ldw r5, (r3, 0) | |
306 | ldw r11, (r3, 4) | |
307 | ldw r8, (r3, 8) | |
308 | ldw r9, (r3, 12) | |
309 | ||
310 | GET_FRONT_BITS r1 24 | |
311 | mov r10, r5 | |
312 | GET_AFTER_BITS r5 8 | |
313 | or r5, r1 | |
314 | ||
315 | GET_FRONT_BITS r10 24 | |
316 | mov r1, r11 | |
317 | GET_AFTER_BITS r11 8 | |
318 | or r11, r10 | |
319 | ||
320 | GET_FRONT_BITS r1 24 | |
321 | mov r10, r8 | |
322 | GET_AFTER_BITS r8 8 | |
323 | or r8, r1 | |
324 | ||
325 | GET_FRONT_BITS r10 24 | |
326 | mov r1, r9 | |
327 | GET_AFTER_BITS r9 8 | |
328 | or r9, r10 | |
329 | ||
330 | stw r5, (r7, 0) | |
331 | stw r11, (r7, 4) | |
332 | stw r8, (r7, 8) | |
333 | stw r9, (r7, 12) | |
334 | subi r4, 16 | |
335 | addi r3, 16 | |
336 | addi r7, 16 | |
337 | cmplti r4, 16 | |
338 | jbf .L32 | |
339 | ldw r8, (sp, 0) | |
340 | ldw r9, (sp, 4) | |
341 | ldw r10, (sp, 8) | |
342 | ldw r11, (sp, 12) | |
343 | addi sp, 16 | |
344 | cmplti r4, 4 | |
345 | bf .L30 | |
346 | subi r3, 1 | |
347 | br .L_copy_by_byte |