Commit | Line | Data |
---|---|---|
4494ce4f KM |
1 | /* SPDX-License-Identifier: GPL-2.0+ |
2 | * | |
3 | * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ | |
1da177e4 LT |
4 | * |
5 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
6 | * operating system. INET is implemented using the BSD Socket | |
7 | * interface as the means of communication with the user level. | |
8 | * | |
9 | * IP/TCP/UDP checksumming routines | |
10 | * | |
11 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | |
12 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | |
13 | * Tom May, <ftom@netcom.com> | |
14 | * Pentium Pro/II routines: | |
15 | * Alexander Kjeldaas <astor@guardian.no> | |
16 | * Finn Arne Gangstad <finnag@guardian.no> | |
17 | * Lots of code moved from tcp.c and ip.c; see those files | |
18 | * for more names. | |
19 | * | |
20 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | |
21 | * handling. | |
22 | * Andi Kleen, add zeroing on error | |
23 | * converted to pure assembler | |
24 | * | |
25 | * SuperH version: Copyright (C) 1999 Niibe Yutaka | |
1da177e4 LT |
26 | */ |
27 | ||
28 | #include <asm/errno.h> | |
29 | #include <linux/linkage.h> | |
30 | ||
31 | /* | |
32 | * computes a partial checksum, e.g. for TCP/UDP fragments | |
33 | */ | |
34 | ||
35 | /* | |
cadc4e1a | 36 | * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum); |
1da177e4 LT |
37 | */ |
38 | ||
39 | .text | |
40 | ENTRY(csum_partial) | |
41 | /* | |
42 | * Experiments with Ethernet and SLIP connections show that buff | |
43 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
44 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
45 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
46 | * alignment for the unrolled loop. | |
47 | */ | |
1da177e4 | 48 | mov r4, r0 |
cadc4e1a SM |
49 | tst #3, r0 ! Check alignment. |
50 | bt/s 2f ! Jump if alignment is ok. | |
51 | mov r4, r7 ! Keep a copy to check for alignment | |
1da177e4 | 52 | ! |
cadc4e1a SM |
53 | tst #1, r0 ! Check alignment. |
54 | bt 21f ! Jump if alignment is boundary of 2bytes. | |
55 | ||
56 | ! buf is odd | |
57 | tst r5, r5 | |
58 | add #-1, r5 | |
59 | bt 9f | |
60 | mov.b @r4+, r0 | |
61 | extu.b r0, r0 | |
62 | addc r0, r6 ! t=0 from previous tst | |
63 | mov r6, r0 | |
64 | shll8 r6 | |
65 | shlr16 r0 | |
66 | shlr8 r0 | |
67 | or r0, r6 | |
68 | mov r4, r0 | |
69 | tst #2, r0 | |
70 | bt 2f | |
71 | 21: | |
72 | ! buf is 2 byte aligned (len could be 0) | |
1da177e4 LT |
73 | add #-2, r5 ! Alignment uses up two bytes. |
74 | cmp/pz r5 ! | |
75 | bt/s 1f ! Jump if we had at least two bytes. | |
76 | clrt | |
77 | bra 6f | |
78 | add #2, r5 ! r5 was < 2. Deal with it. | |
79 | 1: | |
1da177e4 LT |
80 | mov.w @r4+, r0 |
81 | extu.w r0, r0 | |
82 | addc r0, r6 | |
83 | bf 2f | |
84 | add #1, r6 | |
85 | 2: | |
cadc4e1a SM |
86 | ! buf is 4 byte aligned (len could be 0) |
87 | mov r5, r1 | |
1da177e4 | 88 | mov #-5, r0 |
cadc4e1a SM |
89 | shld r0, r1 |
90 | tst r1, r1 | |
1da177e4 LT |
91 | bt/s 4f ! if it's =0, go to 4f |
92 | clrt | |
93 | .align 2 | |
94 | 3: | |
95 | mov.l @r4+, r0 | |
96 | mov.l @r4+, r2 | |
97 | mov.l @r4+, r3 | |
98 | addc r0, r6 | |
99 | mov.l @r4+, r0 | |
100 | addc r2, r6 | |
101 | mov.l @r4+, r2 | |
102 | addc r3, r6 | |
103 | mov.l @r4+, r3 | |
104 | addc r0, r6 | |
105 | mov.l @r4+, r0 | |
106 | addc r2, r6 | |
107 | mov.l @r4+, r2 | |
108 | addc r3, r6 | |
109 | addc r0, r6 | |
110 | addc r2, r6 | |
111 | movt r0 | |
cadc4e1a | 112 | dt r1 |
1da177e4 LT |
113 | bf/s 3b |
114 | cmp/eq #1, r0 | |
cadc4e1a SM |
115 | ! here, we know r1==0 |
116 | addc r1, r6 ! add carry to r6 | |
1da177e4 | 117 | 4: |
cadc4e1a | 118 | mov r5, r0 |
1da177e4 LT |
119 | and #0x1c, r0 |
120 | tst r0, r0 | |
cadc4e1a SM |
121 | bt 6f |
122 | ! 4 bytes or more remaining | |
123 | mov r0, r1 | |
124 | shlr2 r1 | |
1da177e4 LT |
125 | mov #0, r2 |
126 | 5: | |
127 | addc r2, r6 | |
128 | mov.l @r4+, r2 | |
129 | movt r0 | |
cadc4e1a | 130 | dt r1 |
1da177e4 LT |
131 | bf/s 5b |
132 | cmp/eq #1, r0 | |
133 | addc r2, r6 | |
cadc4e1a | 134 | addc r1, r6 ! r1==0 here, so it means add carry-bit |
1da177e4 | 135 | 6: |
cadc4e1a | 136 | ! 3 bytes or less remaining |
1da177e4 LT |
137 | mov #3, r0 |
138 | and r0, r5 | |
139 | tst r5, r5 | |
140 | bt 9f ! if it's =0 go to 9f | |
141 | mov #2, r1 | |
142 | cmp/hs r1, r5 | |
143 | bf 7f | |
144 | mov.w @r4+, r0 | |
145 | extu.w r0, r0 | |
146 | cmp/eq r1, r5 | |
147 | bt/s 8f | |
148 | clrt | |
149 | shll16 r0 | |
150 | addc r0, r6 | |
151 | 7: | |
152 | mov.b @r4+, r0 | |
153 | extu.b r0, r0 | |
154 | #ifndef __LITTLE_ENDIAN__ | |
155 | shll8 r0 | |
156 | #endif | |
157 | 8: | |
158 | addc r0, r6 | |
159 | mov #0, r0 | |
cadc4e1a | 160 | addc r0, r6 |
1da177e4 | 161 | 9: |
cadc4e1a SM |
162 | ! Check if the buffer was misaligned, if so realign sum |
163 | mov r7, r0 | |
164 | tst #1, r0 | |
165 | bt 10f | |
166 | mov r6, r0 | |
167 | shll8 r6 | |
168 | shlr16 r0 | |
169 | shlr8 r0 | |
170 | or r0, r6 | |
171 | 10: | |
1da177e4 LT |
172 | rts |
173 | mov r6, r0 | |
174 | ||
175 | /* | |
176 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, | |
177 | int sum, int *src_err_ptr, int *dst_err_ptr) | |
178 | */ | |
179 | ||
180 | /* | |
181 | * Copy from ds while checksumming, otherwise like csum_partial | |
182 | * | |
183 | * The macros SRC and DST specify the type of access for the instruction. | |
184 | * thus we can call a custom exception handler for all access types. | |
185 | * | |
186 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | |
187 | * DST definitions? It's damn hard to trigger all cases. I hope I got | |
188 | * them all but there's no guarantee. | |
189 | */ | |
190 | ||
191 | #define SRC(...) \ | |
192 | 9999: __VA_ARGS__ ; \ | |
193 | .section __ex_table, "a"; \ | |
194 | .long 9999b, 6001f ; \ | |
195 | .previous | |
196 | ||
197 | #define DST(...) \ | |
198 | 9999: __VA_ARGS__ ; \ | |
199 | .section __ex_table, "a"; \ | |
200 | .long 9999b, 6002f ; \ | |
201 | .previous | |
202 | ||
203 | ! | |
204 | ! r4: const char *SRC | |
205 | ! r5: char *DST | |
206 | ! r6: int LEN | |
207 | ! r7: int SUM | |
208 | ! | |
209 | ! on stack: | |
210 | ! int *SRC_ERR_PTR | |
211 | ! int *DST_ERR_PTR | |
212 | ! | |
213 | ENTRY(csum_partial_copy_generic) | |
214 | mov.l r5,@-r15 | |
215 | mov.l r6,@-r15 | |
216 | ||
217 | mov #3,r0 ! Check src and dest are equally aligned | |
218 | mov r4,r1 | |
219 | and r0,r1 | |
220 | and r5,r0 | |
221 | cmp/eq r1,r0 | |
222 | bf 3f ! Different alignments, use slow version | |
223 | tst #1,r0 ! Check dest word aligned | |
224 | bf 3f ! If not, do it the slow way | |
225 | ||
226 | mov #2,r0 | |
227 | tst r0,r5 ! Check dest alignment. | |
228 | bt 2f ! Jump if alignment is ok. | |
229 | add #-2,r6 ! Alignment uses up two bytes. | |
230 | cmp/pz r6 ! Jump if we had at least two bytes. | |
231 | bt/s 1f | |
232 | clrt | |
24ab54cb | 233 | add #2,r6 ! r6 was < 2. Deal with it. |
1da177e4 | 234 | bra 4f |
24ab54cb | 235 | mov r6,r2 |
1da177e4 LT |
236 | |
237 | 3: ! Handle different src and dest alignments. | |
238 | ! This is not common, so simple byte by byte copy will do. | |
239 | mov r6,r2 | |
240 | shlr r6 | |
241 | tst r6,r6 | |
242 | bt 4f | |
243 | clrt | |
244 | .align 2 | |
245 | 5: | |
246 | SRC( mov.b @r4+,r1 ) | |
247 | SRC( mov.b @r4+,r0 ) | |
248 | extu.b r1,r1 | |
249 | DST( mov.b r1,@r5 ) | |
250 | DST( mov.b r0,@(1,r5) ) | |
251 | extu.b r0,r0 | |
252 | add #2,r5 | |
253 | ||
254 | #ifdef __LITTLE_ENDIAN__ | |
255 | shll8 r0 | |
256 | #else | |
257 | shll8 r1 | |
258 | #endif | |
259 | or r1,r0 | |
260 | ||
261 | addc r0,r7 | |
262 | movt r0 | |
263 | dt r6 | |
264 | bf/s 5b | |
265 | cmp/eq #1,r0 | |
266 | mov #0,r0 | |
267 | addc r0, r7 | |
268 | ||
269 | mov r2, r0 | |
270 | tst #1, r0 | |
271 | bt 7f | |
272 | bra 5f | |
273 | clrt | |
274 | ||
275 | ! src and dest equally aligned, but to a two byte boundary. | |
276 | ! Handle first two bytes as a special case | |
277 | .align 2 | |
278 | 1: | |
279 | SRC( mov.w @r4+,r0 ) | |
280 | DST( mov.w r0,@r5 ) | |
281 | add #2,r5 | |
282 | extu.w r0,r0 | |
283 | addc r0,r7 | |
284 | mov #0,r0 | |
285 | addc r0,r7 | |
286 | 2: | |
287 | mov r6,r2 | |
288 | mov #-5,r0 | |
289 | shld r0,r6 | |
290 | tst r6,r6 | |
291 | bt/s 2f | |
292 | clrt | |
293 | .align 2 | |
294 | 1: | |
295 | SRC( mov.l @r4+,r0 ) | |
296 | SRC( mov.l @r4+,r1 ) | |
297 | addc r0,r7 | |
298 | DST( mov.l r0,@r5 ) | |
299 | DST( mov.l r1,@(4,r5) ) | |
300 | addc r1,r7 | |
301 | ||
302 | SRC( mov.l @r4+,r0 ) | |
303 | SRC( mov.l @r4+,r1 ) | |
304 | addc r0,r7 | |
305 | DST( mov.l r0,@(8,r5) ) | |
306 | DST( mov.l r1,@(12,r5) ) | |
307 | addc r1,r7 | |
308 | ||
309 | SRC( mov.l @r4+,r0 ) | |
310 | SRC( mov.l @r4+,r1 ) | |
311 | addc r0,r7 | |
312 | DST( mov.l r0,@(16,r5) ) | |
313 | DST( mov.l r1,@(20,r5) ) | |
314 | addc r1,r7 | |
315 | ||
316 | SRC( mov.l @r4+,r0 ) | |
317 | SRC( mov.l @r4+,r1 ) | |
318 | addc r0,r7 | |
319 | DST( mov.l r0,@(24,r5) ) | |
320 | DST( mov.l r1,@(28,r5) ) | |
321 | addc r1,r7 | |
322 | add #32,r5 | |
323 | movt r0 | |
324 | dt r6 | |
325 | bf/s 1b | |
326 | cmp/eq #1,r0 | |
327 | mov #0,r0 | |
328 | addc r0,r7 | |
329 | ||
330 | 2: mov r2,r6 | |
331 | mov #0x1c,r0 | |
332 | and r0,r6 | |
333 | cmp/pl r6 | |
334 | bf/s 4f | |
335 | clrt | |
336 | shlr2 r6 | |
337 | 3: | |
338 | SRC( mov.l @r4+,r0 ) | |
339 | addc r0,r7 | |
340 | DST( mov.l r0,@r5 ) | |
341 | add #4,r5 | |
342 | movt r0 | |
343 | dt r6 | |
344 | bf/s 3b | |
345 | cmp/eq #1,r0 | |
346 | mov #0,r0 | |
347 | addc r0,r7 | |
348 | 4: mov r2,r6 | |
349 | mov #3,r0 | |
350 | and r0,r6 | |
351 | cmp/pl r6 | |
352 | bf 7f | |
353 | mov #2,r1 | |
354 | cmp/hs r1,r6 | |
355 | bf 5f | |
356 | SRC( mov.w @r4+,r0 ) | |
357 | DST( mov.w r0,@r5 ) | |
358 | extu.w r0,r0 | |
359 | add #2,r5 | |
360 | cmp/eq r1,r6 | |
361 | bt/s 6f | |
362 | clrt | |
363 | shll16 r0 | |
364 | addc r0,r7 | |
365 | 5: | |
366 | SRC( mov.b @r4+,r0 ) | |
367 | DST( mov.b r0,@r5 ) | |
368 | extu.b r0,r0 | |
369 | #ifndef __LITTLE_ENDIAN__ | |
370 | shll8 r0 | |
371 | #endif | |
372 | 6: addc r0,r7 | |
373 | mov #0,r0 | |
374 | addc r0,r7 | |
375 | 7: | |
376 | 5000: | |
377 | ||
378 | # Exception handler: | |
379 | .section .fixup, "ax" | |
380 | ||
381 | 6001: | |
382 | mov.l @(8,r15),r0 ! src_err_ptr | |
383 | mov #-EFAULT,r1 | |
384 | mov.l r1,@r0 | |
385 | ||
386 | ! zero the complete destination - computing the rest | |
387 | ! is too much work | |
388 | mov.l @(4,r15),r5 ! dst | |
389 | mov.l @r15,r6 ! len | |
390 | mov #0,r7 | |
391 | 1: mov.b r7,@r5 | |
392 | dt r6 | |
393 | bf/s 1b | |
394 | add #1,r5 | |
395 | mov.l 8000f,r0 | |
396 | jmp @r0 | |
397 | nop | |
398 | .align 2 | |
399 | 8000: .long 5000b | |
400 | ||
401 | 6002: | |
402 | mov.l @(12,r15),r0 ! dst_err_ptr | |
403 | mov #-EFAULT,r1 | |
404 | mov.l r1,@r0 | |
405 | mov.l 8001f,r0 | |
406 | jmp @r0 | |
407 | nop | |
408 | .align 2 | |
409 | 8001: .long 5000b | |
410 | ||
411 | .previous | |
412 | add #8,r15 | |
413 | rts | |
414 | mov r7,r0 |