Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux-2.6-block.git] / arch / csky / abiv1 / memcpy.S
CommitLineData
c5af58b7
GR
1/* SPDX-License-Identifier: GPL-2.0 */
2// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
3
4#include <linux/linkage.h>
5
6.macro GET_FRONT_BITS rx y
7#ifdef __cskyLE__
8 lsri \rx, \y
9#else
10 lsli \rx, \y
11#endif
12.endm
13
14.macro GET_AFTER_BITS rx y
15#ifdef __cskyLE__
16 lsli \rx, \y
17#else
18 lsri \rx, \y
19#endif
20.endm
21
22/* void *memcpy(void *dest, const void *src, size_t n); */
23ENTRY(memcpy)
24 mov r7, r2
25 cmplti r4, 4
26 bt .L_copy_by_byte
27 mov r6, r2
28 andi r6, 3
29 cmpnei r6, 0
30 jbt .L_dest_not_aligned
31 mov r6, r3
32 andi r6, 3
33 cmpnei r6, 0
34 jbt .L_dest_aligned_but_src_not_aligned
35.L0:
36 cmplti r4, 16
37 jbt .L_aligned_and_len_less_16bytes
38 subi sp, 8
39 stw r8, (sp, 0)
40.L_aligned_and_len_larger_16bytes:
41 ldw r1, (r3, 0)
42 ldw r5, (r3, 4)
43 ldw r8, (r3, 8)
44 stw r1, (r7, 0)
45 ldw r1, (r3, 12)
46 stw r5, (r7, 4)
47 stw r8, (r7, 8)
48 stw r1, (r7, 12)
49 subi r4, 16
50 addi r3, 16
51 addi r7, 16
52 cmplti r4, 16
53 jbf .L_aligned_and_len_larger_16bytes
54 ldw r8, (sp, 0)
55 addi sp, 8
56 cmpnei r4, 0
57 jbf .L_return
58
59.L_aligned_and_len_less_16bytes:
60 cmplti r4, 4
61 bt .L_copy_by_byte
62.L1:
63 ldw r1, (r3, 0)
64 stw r1, (r7, 0)
65 subi r4, 4
66 addi r3, 4
67 addi r7, 4
68 cmplti r4, 4
69 jbf .L1
70 br .L_copy_by_byte
71
72.L_return:
73 rts
74
75.L_copy_by_byte: /* len less than 4 bytes */
76 cmpnei r4, 0
77 jbf .L_return
78.L4:
79 ldb r1, (r3, 0)
80 stb r1, (r7, 0)
81 addi r3, 1
82 addi r7, 1
83 decne r4
84 jbt .L4
85 rts
86
87/*
88 * If dest is not aligned, just copying some bytes makes the dest align.
89 * Afther that, we judge whether the src is aligned.
90 */
91.L_dest_not_aligned:
92 mov r5, r3
93 rsub r5, r5, r7
94 abs r5, r5
95 cmplt r5, r4
96 bt .L_copy_by_byte
97 mov r5, r7
98 sub r5, r3
99 cmphs r5, r4
100 bf .L_copy_by_byte
101 mov r5, r6
102.L5:
103 ldb r1, (r3, 0) /* makes the dest align. */
104 stb r1, (r7, 0)
105 addi r5, 1
106 subi r4, 1
107 addi r3, 1
108 addi r7, 1
109 cmpnei r5, 4
110 jbt .L5
111 cmplti r4, 4
112 jbt .L_copy_by_byte
113 mov r6, r3 /* judge whether the src is aligned. */
114 andi r6, 3
115 cmpnei r6, 0
116 jbf .L0
117
118/* Judge the number of misaligned, 1, 2, 3? */
119.L_dest_aligned_but_src_not_aligned:
120 mov r5, r3
121 rsub r5, r5, r7
122 abs r5, r5
123 cmplt r5, r4
124 bt .L_copy_by_byte
125 bclri r3, 0
126 bclri r3, 1
127 ldw r1, (r3, 0)
128 addi r3, 4
129 cmpnei r6, 2
130 bf .L_dest_aligned_but_src_not_aligned_2bytes
131 cmpnei r6, 3
132 bf .L_dest_aligned_but_src_not_aligned_3bytes
133
134.L_dest_aligned_but_src_not_aligned_1byte:
135 mov r5, r7
136 sub r5, r3
137 cmphs r5, r4
138 bf .L_copy_by_byte
139 cmplti r4, 16
140 bf .L11
141.L10: /* If the len is less than 16 bytes */
142 GET_FRONT_BITS r1 8
143 mov r5, r1
144 ldw r6, (r3, 0)
145 mov r1, r6
146 GET_AFTER_BITS r6 24
147 or r5, r6
148 stw r5, (r7, 0)
149 subi r4, 4
150 addi r3, 4
151 addi r7, 4
152 cmplti r4, 4
153 bf .L10
154 subi r3, 3
155 br .L_copy_by_byte
156.L11:
157 subi sp, 16
158 stw r8, (sp, 0)
159 stw r9, (sp, 4)
160 stw r10, (sp, 8)
161 stw r11, (sp, 12)
162.L12:
163 ldw r5, (r3, 0)
164 ldw r11, (r3, 4)
165 ldw r8, (r3, 8)
166 ldw r9, (r3, 12)
167
168 GET_FRONT_BITS r1 8 /* little or big endian? */
169 mov r10, r5
170 GET_AFTER_BITS r5 24
171 or r5, r1
172
173 GET_FRONT_BITS r10 8
174 mov r1, r11
175 GET_AFTER_BITS r11 24
176 or r11, r10
177
178 GET_FRONT_BITS r1 8
179 mov r10, r8
180 GET_AFTER_BITS r8 24
181 or r8, r1
182
183 GET_FRONT_BITS r10 8
184 mov r1, r9
185 GET_AFTER_BITS r9 24
186 or r9, r10
187
188 stw r5, (r7, 0)
189 stw r11, (r7, 4)
190 stw r8, (r7, 8)
191 stw r9, (r7, 12)
192 subi r4, 16
193 addi r3, 16
194 addi r7, 16
195 cmplti r4, 16
196 jbf .L12
197 ldw r8, (sp, 0)
198 ldw r9, (sp, 4)
199 ldw r10, (sp, 8)
200 ldw r11, (sp, 12)
201 addi sp , 16
202 cmplti r4, 4
203 bf .L10
204 subi r3, 3
205 br .L_copy_by_byte
206
207.L_dest_aligned_but_src_not_aligned_2bytes:
208 cmplti r4, 16
209 bf .L21
210.L20:
211 GET_FRONT_BITS r1 16
212 mov r5, r1
213 ldw r6, (r3, 0)
214 mov r1, r6
215 GET_AFTER_BITS r6 16
216 or r5, r6
217 stw r5, (r7, 0)
218 subi r4, 4
219 addi r3, 4
220 addi r7, 4
221 cmplti r4, 4
222 bf .L20
223 subi r3, 2
224 br .L_copy_by_byte
225 rts
226
227.L21: /* n > 16 */
228 subi sp, 16
229 stw r8, (sp, 0)
230 stw r9, (sp, 4)
231 stw r10, (sp, 8)
232 stw r11, (sp, 12)
233
234.L22:
235 ldw r5, (r3, 0)
236 ldw r11, (r3, 4)
237 ldw r8, (r3, 8)
238 ldw r9, (r3, 12)
239
240 GET_FRONT_BITS r1 16
241 mov r10, r5
242 GET_AFTER_BITS r5 16
243 or r5, r1
244
245 GET_FRONT_BITS r10 16
246 mov r1, r11
247 GET_AFTER_BITS r11 16
248 or r11, r10
249
250 GET_FRONT_BITS r1 16
251 mov r10, r8
252 GET_AFTER_BITS r8 16
253 or r8, r1
254
255 GET_FRONT_BITS r10 16
256 mov r1, r9
257 GET_AFTER_BITS r9 16
258 or r9, r10
259
260 stw r5, (r7, 0)
261 stw r11, (r7, 4)
262 stw r8, (r7, 8)
263 stw r9, (r7, 12)
264 subi r4, 16
265 addi r3, 16
266 addi r7, 16
267 cmplti r4, 16
268 jbf .L22
269 ldw r8, (sp, 0)
270 ldw r9, (sp, 4)
271 ldw r10, (sp, 8)
272 ldw r11, (sp, 12)
273 addi sp, 16
274 cmplti r4, 4
275 bf .L20
276 subi r3, 2
277 br .L_copy_by_byte
278
279
280.L_dest_aligned_but_src_not_aligned_3bytes:
281 cmplti r4, 16
282 bf .L31
283.L30:
284 GET_FRONT_BITS r1 24
285 mov r5, r1
286 ldw r6, (r3, 0)
287 mov r1, r6
288 GET_AFTER_BITS r6 8
289 or r5, r6
290 stw r5, (r7, 0)
291 subi r4, 4
292 addi r3, 4
293 addi r7, 4
294 cmplti r4, 4
295 bf .L30
296 subi r3, 1
297 br .L_copy_by_byte
298.L31:
299 subi sp, 16
300 stw r8, (sp, 0)
301 stw r9, (sp, 4)
302 stw r10, (sp, 8)
303 stw r11, (sp, 12)
304.L32:
305 ldw r5, (r3, 0)
306 ldw r11, (r3, 4)
307 ldw r8, (r3, 8)
308 ldw r9, (r3, 12)
309
310 GET_FRONT_BITS r1 24
311 mov r10, r5
312 GET_AFTER_BITS r5 8
313 or r5, r1
314
315 GET_FRONT_BITS r10 24
316 mov r1, r11
317 GET_AFTER_BITS r11 8
318 or r11, r10
319
320 GET_FRONT_BITS r1 24
321 mov r10, r8
322 GET_AFTER_BITS r8 8
323 or r8, r1
324
325 GET_FRONT_BITS r10 24
326 mov r1, r9
327 GET_AFTER_BITS r9 8
328 or r9, r10
329
330 stw r5, (r7, 0)
331 stw r11, (r7, 4)
332 stw r8, (r7, 8)
333 stw r9, (r7, 12)
334 subi r4, 16
335 addi r3, 16
336 addi r7, 16
337 cmplti r4, 16
338 jbf .L32
339 ldw r8, (sp, 0)
340 ldw r9, (sp, 4)
341 ldw r10, (sp, 8)
342 ldw r11, (sp, 12)
343 addi sp, 16
344 cmplti r4, 4
345 bf .L30
346 subi r3, 1
347 br .L_copy_by_byte