Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/csumpartialcopygeneric.S | |
3 | * | |
4 | * Copyright (C) 1995-2001 Russell King | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | /* | |
12 | * unsigned int | |
13 | * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) | |
14 | * r0 = src, r1 = dst, r2 = len, r3 = sum | |
15 | * Returns : r0 = checksum | |
16 | * | |
17 | * Note that 'tst' and 'teq' preserve the carry flag. | |
18 | */ | |
19 | ||
20 | src .req r0 | |
21 | dst .req r1 | |
22 | len .req r2 | |
23 | sum .req r3 | |
24 | ||
25 | .zero: mov r0, sum | |
26 | load_regs ea | |
27 | ||
28 | /* | |
29 | * Align an unaligned destination pointer. We know that | |
30 | * we have >= 8 bytes here, so we don't need to check | |
31 | * the length. Note that the source pointer hasn't been | |
32 | * aligned yet. | |
33 | */ | |
34 | .dst_unaligned: tst dst, #1 | |
35 | beq .dst_16bit | |
36 | ||
37 | load1b ip | |
38 | sub len, len, #1 | |
39 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
40 | strb ip, [dst], #1 | |
41 | tst dst, #2 | |
42 | moveq pc, lr @ dst is now 32bit aligned | |
43 | ||
44 | .dst_16bit: load2b r8, ip | |
45 | sub len, len, #2 | |
46 | adcs sum, sum, r8, put_byte_0 | |
47 | strb r8, [dst], #1 | |
48 | adcs sum, sum, ip, put_byte_1 | |
49 | strb ip, [dst], #1 | |
50 | mov pc, lr @ dst is now 32bit aligned | |
51 | ||
52 | /* | |
53 | * Handle 0 to 7 bytes, with any alignment of source and | |
54 | * destination pointers. Note that when we get here, C = 0 | |
55 | */ | |
56 | .less8: teq len, #0 @ check for zero count | |
57 | beq .zero | |
58 | ||
59 | /* we must have at least one byte. */ | |
60 | tst dst, #1 @ dst 16-bit aligned | |
61 | beq .less8_aligned | |
62 | ||
63 | /* Align dst */ | |
64 | load1b ip | |
65 | sub len, len, #1 | |
66 | adcs sum, sum, ip, put_byte_1 @ update checksum | |
67 | strb ip, [dst], #1 | |
68 | tst len, #6 | |
69 | beq .less8_byteonly | |
70 | ||
71 | 1: load2b r8, ip | |
72 | sub len, len, #2 | |
73 | adcs sum, sum, r8, put_byte_0 | |
74 | strb r8, [dst], #1 | |
75 | adcs sum, sum, ip, put_byte_1 | |
76 | strb ip, [dst], #1 | |
77 | .less8_aligned: tst len, #6 | |
78 | bne 1b | |
79 | .less8_byteonly: | |
80 | tst len, #1 | |
81 | beq .done | |
82 | load1b r8 | |
83 | adcs sum, sum, r8, put_byte_0 @ update checksum | |
84 | strb r8, [dst], #1 | |
85 | b .done | |
86 | ||
87 | FN_ENTRY | |
88 | mov ip, sp | |
89 | save_regs | |
90 | sub fp, ip, #4 | |
91 | ||
92 | cmp len, #8 @ Ensure that we have at least | |
93 | blo .less8 @ 8 bytes to copy. | |
94 | ||
95 | adds sum, sum, #0 @ C = 0 | |
96 | tst dst, #3 @ Test destination alignment | |
97 | blne .dst_unaligned @ align destination, return here | |
98 | ||
99 | /* | |
100 | * Ok, the dst pointer is now 32bit aligned, and we know | |
101 | * that we must have more than 4 bytes to copy. Note | |
102 | * that C contains the carry from the dst alignment above. | |
103 | */ | |
104 | ||
105 | tst src, #3 @ Test source alignment | |
106 | bne .src_not_aligned | |
107 | ||
108 | /* Routine for src & dst aligned */ | |
109 | ||
110 | bics ip, len, #15 | |
111 | beq 2f | |
112 | ||
113 | 1: load4l r4, r5, r6, r7 | |
114 | stmia dst!, {r4, r5, r6, r7} | |
115 | adcs sum, sum, r4 | |
116 | adcs sum, sum, r5 | |
117 | adcs sum, sum, r6 | |
118 | adcs sum, sum, r7 | |
119 | sub ip, ip, #16 | |
120 | teq ip, #0 | |
121 | bne 1b | |
122 | ||
123 | 2: ands ip, len, #12 | |
124 | beq 4f | |
125 | tst ip, #8 | |
126 | beq 3f | |
127 | load2l r4, r5 | |
128 | stmia dst!, {r4, r5} | |
129 | adcs sum, sum, r4 | |
130 | adcs sum, sum, r5 | |
131 | tst ip, #4 | |
132 | beq 4f | |
133 | ||
134 | 3: load1l r4 | |
135 | str r4, [dst], #4 | |
136 | adcs sum, sum, r4 | |
137 | ||
138 | 4: ands len, len, #3 | |
139 | beq .done | |
140 | load1l r4 | |
141 | tst len, #2 | |
142 | mov r5, r4, get_byte_0 | |
143 | beq .exit | |
144 | adcs sum, sum, r4, push #16 | |
145 | strb r5, [dst], #1 | |
146 | mov r5, r4, get_byte_1 | |
147 | strb r5, [dst], #1 | |
148 | mov r5, r4, get_byte_2 | |
149 | .exit: tst len, #1 | |
150 | strneb r5, [dst], #1 | |
151 | andne r5, r5, #255 | |
152 | adcnes sum, sum, r5, put_byte_0 | |
153 | ||
154 | /* | |
155 | * If the dst pointer was not 16-bit aligned, we | |
156 | * need to rotate the checksum here to get around | |
157 | * the inefficient byte manipulations in the | |
158 | * architecture independent code. | |
159 | */ | |
160 | .done: adc r0, sum, #0 | |
161 | ldr sum, [sp, #0] @ dst | |
162 | tst sum, #1 | |
163 | movne r0, r0, ror #8 | |
164 | load_regs ea | |
165 | ||
166 | .src_not_aligned: | |
167 | adc sum, sum, #0 @ include C from dst alignment | |
168 | and ip, src, #3 | |
169 | bic src, src, #3 | |
170 | load1l r5 | |
171 | cmp ip, #2 | |
172 | beq .src2_aligned | |
173 | bhi .src3_aligned | |
174 | mov r4, r5, pull #8 @ C = 0 | |
175 | bics ip, len, #15 | |
176 | beq 2f | |
177 | 1: load4l r5, r6, r7, r8 | |
178 | orr r4, r4, r5, push #24 | |
179 | mov r5, r5, pull #8 | |
180 | orr r5, r5, r6, push #24 | |
181 | mov r6, r6, pull #8 | |
182 | orr r6, r6, r7, push #24 | |
183 | mov r7, r7, pull #8 | |
184 | orr r7, r7, r8, push #24 | |
185 | stmia dst!, {r4, r5, r6, r7} | |
186 | adcs sum, sum, r4 | |
187 | adcs sum, sum, r5 | |
188 | adcs sum, sum, r6 | |
189 | adcs sum, sum, r7 | |
190 | mov r4, r8, pull #8 | |
191 | sub ip, ip, #16 | |
192 | teq ip, #0 | |
193 | bne 1b | |
194 | 2: ands ip, len, #12 | |
195 | beq 4f | |
196 | tst ip, #8 | |
197 | beq 3f | |
198 | load2l r5, r6 | |
199 | orr r4, r4, r5, push #24 | |
200 | mov r5, r5, pull #8 | |
201 | orr r5, r5, r6, push #24 | |
202 | stmia dst!, {r4, r5} | |
203 | adcs sum, sum, r4 | |
204 | adcs sum, sum, r5 | |
205 | mov r4, r6, pull #8 | |
206 | tst ip, #4 | |
207 | beq 4f | |
208 | 3: load1l r5 | |
209 | orr r4, r4, r5, push #24 | |
210 | str r4, [dst], #4 | |
211 | adcs sum, sum, r4 | |
212 | mov r4, r5, pull #8 | |
213 | 4: ands len, len, #3 | |
214 | beq .done | |
215 | mov r5, r4, get_byte_0 | |
216 | tst len, #2 | |
217 | beq .exit | |
218 | adcs sum, sum, r4, push #16 | |
219 | strb r5, [dst], #1 | |
220 | mov r5, r4, get_byte_1 | |
221 | strb r5, [dst], #1 | |
222 | mov r5, r4, get_byte_2 | |
223 | b .exit | |
224 | ||
225 | .src2_aligned: mov r4, r5, pull #16 | |
226 | adds sum, sum, #0 | |
227 | bics ip, len, #15 | |
228 | beq 2f | |
229 | 1: load4l r5, r6, r7, r8 | |
230 | orr r4, r4, r5, push #16 | |
231 | mov r5, r5, pull #16 | |
232 | orr r5, r5, r6, push #16 | |
233 | mov r6, r6, pull #16 | |
234 | orr r6, r6, r7, push #16 | |
235 | mov r7, r7, pull #16 | |
236 | orr r7, r7, r8, push #16 | |
237 | stmia dst!, {r4, r5, r6, r7} | |
238 | adcs sum, sum, r4 | |
239 | adcs sum, sum, r5 | |
240 | adcs sum, sum, r6 | |
241 | adcs sum, sum, r7 | |
242 | mov r4, r8, pull #16 | |
243 | sub ip, ip, #16 | |
244 | teq ip, #0 | |
245 | bne 1b | |
246 | 2: ands ip, len, #12 | |
247 | beq 4f | |
248 | tst ip, #8 | |
249 | beq 3f | |
250 | load2l r5, r6 | |
251 | orr r4, r4, r5, push #16 | |
252 | mov r5, r5, pull #16 | |
253 | orr r5, r5, r6, push #16 | |
254 | stmia dst!, {r4, r5} | |
255 | adcs sum, sum, r4 | |
256 | adcs sum, sum, r5 | |
257 | mov r4, r6, pull #16 | |
258 | tst ip, #4 | |
259 | beq 4f | |
260 | 3: load1l r5 | |
261 | orr r4, r4, r5, push #16 | |
262 | str r4, [dst], #4 | |
263 | adcs sum, sum, r4 | |
264 | mov r4, r5, pull #16 | |
265 | 4: ands len, len, #3 | |
266 | beq .done | |
267 | mov r5, r4, get_byte_0 | |
268 | tst len, #2 | |
269 | beq .exit | |
270 | adcs sum, sum, r4 | |
271 | strb r5, [dst], #1 | |
272 | mov r5, r4, get_byte_1 | |
273 | strb r5, [dst], #1 | |
274 | tst len, #1 | |
275 | beq .done | |
276 | load1b r5 | |
277 | b .exit | |
278 | ||
279 | .src3_aligned: mov r4, r5, pull #24 | |
280 | adds sum, sum, #0 | |
281 | bics ip, len, #15 | |
282 | beq 2f | |
283 | 1: load4l r5, r6, r7, r8 | |
284 | orr r4, r4, r5, push #8 | |
285 | mov r5, r5, pull #24 | |
286 | orr r5, r5, r6, push #8 | |
287 | mov r6, r6, pull #24 | |
288 | orr r6, r6, r7, push #8 | |
289 | mov r7, r7, pull #24 | |
290 | orr r7, r7, r8, push #8 | |
291 | stmia dst!, {r4, r5, r6, r7} | |
292 | adcs sum, sum, r4 | |
293 | adcs sum, sum, r5 | |
294 | adcs sum, sum, r6 | |
295 | adcs sum, sum, r7 | |
296 | mov r4, r8, pull #24 | |
297 | sub ip, ip, #16 | |
298 | teq ip, #0 | |
299 | bne 1b | |
300 | 2: ands ip, len, #12 | |
301 | beq 4f | |
302 | tst ip, #8 | |
303 | beq 3f | |
304 | load2l r5, r6 | |
305 | orr r4, r4, r5, push #8 | |
306 | mov r5, r5, pull #24 | |
307 | orr r5, r5, r6, push #8 | |
308 | stmia dst!, {r4, r5} | |
309 | adcs sum, sum, r4 | |
310 | adcs sum, sum, r5 | |
311 | mov r4, r6, pull #24 | |
312 | tst ip, #4 | |
313 | beq 4f | |
314 | 3: load1l r5 | |
315 | orr r4, r4, r5, push #8 | |
316 | str r4, [dst], #4 | |
317 | adcs sum, sum, r4 | |
318 | mov r4, r5, pull #24 | |
319 | 4: ands len, len, #3 | |
320 | beq .done | |
321 | mov r5, r4, get_byte_0 | |
322 | tst len, #2 | |
323 | beq .exit | |
324 | strb r5, [dst], #1 | |
325 | adcs sum, sum, r4 | |
326 | load1l r4 | |
327 | mov r5, r4, get_byte_0 | |
328 | strb r5, [dst], #1 | |
329 | adcs sum, sum, r4, push #24 | |
330 | mov r5, r4, get_byte_1 | |
331 | b .exit |