Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
3 | * operating system. INET is implemented using the BSD Socket | |
4 | * interface as the means of communication with the user level. | |
5 | * | |
6 | * IP/TCP/UDP checksumming routines | |
7 | * | |
8 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | |
9 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | |
10 | * Tom May, <ftom@netcom.com> | |
11 | * Pentium Pro/II routines: | |
12 | * Alexander Kjeldaas <astor@guardian.no> | |
13 | * Finn Arne Gangstad <finnag@guardian.no> | |
14 | * Lots of code moved from tcp.c and ip.c; see those files | |
15 | * for more names. | |
16 | * | |
17 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | |
18 | * handling. | |
19 | * Andi Kleen, add zeroing on error | |
20 | * converted to pure assembler | |
21 | * | |
22 | * This program is free software; you can redistribute it and/or | |
23 | * modify it under the terms of the GNU General Public License | |
24 | * as published by the Free Software Foundation; either version | |
25 | * 2 of the License, or (at your option) any later version. | |
26 | */ | |
27 | ||
00e065ea JB |
28 | #include <linux/linkage.h> |
29 | #include <asm/dwarf2.h> | |
1da177e4 LT |
30 | #include <asm/errno.h> |
31 | ||
32 | /* | |
33 | * computes a partial checksum, e.g. for TCP/UDP fragments | |
34 | */ | |
35 | ||
36 | /* | |
37 | unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |
38 | */ | |
39 | ||
40 | .text | |
1da177e4 LT |
41 | |
42 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | |
43 | ||
44 | /* | |
45 | * Experiments with Ethernet and SLIP connections show that buff | |
46 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
47 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
49 | * alignment for the unrolled loop. | |
50 | */ | |
00e065ea JB |
51 | ENTRY(csum_partial) |
52 | CFI_STARTPROC | |
1da177e4 | 53 | pushl %esi |
00e065ea JB |
54 | CFI_ADJUST_CFA_OFFSET 4 |
55 | CFI_REL_OFFSET esi, 0 | |
1da177e4 | 56 | pushl %ebx |
00e065ea JB |
57 | CFI_ADJUST_CFA_OFFSET 4 |
58 | CFI_REL_OFFSET ebx, 0 | |
1da177e4 LT |
59 | movl 20(%esp),%eax # Function arg: unsigned int sum |
60 | movl 16(%esp),%ecx # Function arg: int len | |
61 | movl 12(%esp),%esi # Function arg: unsigned char *buff | |
62 | testl $3, %esi # Check alignment. | |
63 | jz 2f # Jump if alignment is ok. | |
64 | testl $1, %esi # Check alignment. | |
65 | jz 10f # Jump if alignment is boundary of 2bytes. | |
66 | ||
67 | # buf is odd | |
68 | dec %ecx | |
69 | jl 8f | |
70 | movzbl (%esi), %ebx | |
71 | adcl %ebx, %eax | |
72 | roll $8, %eax | |
73 | inc %esi | |
74 | testl $2, %esi | |
75 | jz 2f | |
76 | 10: | |
77 | subl $2, %ecx # Alignment uses up two bytes. | |
78 | jae 1f # Jump if we had at least two bytes. | |
79 | addl $2, %ecx # ecx was < 2. Deal with it. | |
80 | jmp 4f | |
81 | 1: movw (%esi), %bx | |
82 | addl $2, %esi | |
83 | addw %bx, %ax | |
84 | adcl $0, %eax | |
85 | 2: | |
86 | movl %ecx, %edx | |
87 | shrl $5, %ecx | |
88 | jz 2f | |
89 | testl %esi, %esi | |
90 | 1: movl (%esi), %ebx | |
91 | adcl %ebx, %eax | |
92 | movl 4(%esi), %ebx | |
93 | adcl %ebx, %eax | |
94 | movl 8(%esi), %ebx | |
95 | adcl %ebx, %eax | |
96 | movl 12(%esi), %ebx | |
97 | adcl %ebx, %eax | |
98 | movl 16(%esi), %ebx | |
99 | adcl %ebx, %eax | |
100 | movl 20(%esi), %ebx | |
101 | adcl %ebx, %eax | |
102 | movl 24(%esi), %ebx | |
103 | adcl %ebx, %eax | |
104 | movl 28(%esi), %ebx | |
105 | adcl %ebx, %eax | |
106 | lea 32(%esi), %esi | |
107 | dec %ecx | |
108 | jne 1b | |
109 | adcl $0, %eax | |
110 | 2: movl %edx, %ecx | |
111 | andl $0x1c, %edx | |
112 | je 4f | |
113 | shrl $2, %edx # This clears CF | |
114 | 3: adcl (%esi), %eax | |
115 | lea 4(%esi), %esi | |
116 | dec %edx | |
117 | jne 3b | |
118 | adcl $0, %eax | |
119 | 4: andl $3, %ecx | |
120 | jz 7f | |
121 | cmpl $2, %ecx | |
122 | jb 5f | |
123 | movw (%esi),%cx | |
124 | leal 2(%esi),%esi | |
125 | je 6f | |
126 | shll $16,%ecx | |
127 | 5: movb (%esi),%cl | |
128 | 6: addl %ecx,%eax | |
129 | adcl $0, %eax | |
130 | 7: | |
131 | testl $1, 12(%esp) | |
132 | jz 8f | |
133 | roll $8, %eax | |
134 | 8: | |
135 | popl %ebx | |
00e065ea JB |
136 | CFI_ADJUST_CFA_OFFSET -4 |
137 | CFI_RESTORE ebx | |
1da177e4 | 138 | popl %esi |
00e065ea JB |
139 | CFI_ADJUST_CFA_OFFSET -4 |
140 | CFI_RESTORE esi | |
1da177e4 | 141 | ret |
00e065ea JB |
142 | CFI_ENDPROC |
143 | ENDPROC(csum_partial) | |
1da177e4 LT |
144 | |
145 | #else | |
146 | ||
147 | /* Version for PentiumII/PPro */ | |
148 | ||
00e065ea JB |
149 | ENTRY(csum_partial) |
150 | CFI_STARTPROC | |
1da177e4 | 151 | pushl %esi |
00e065ea JB |
152 | CFI_ADJUST_CFA_OFFSET 4 |
153 | CFI_REL_OFFSET esi, 0 | |
1da177e4 | 154 | pushl %ebx |
00e065ea JB |
155 | CFI_ADJUST_CFA_OFFSET 4 |
156 | CFI_REL_OFFSET ebx, 0 | |
1da177e4 LT |
157 | movl 20(%esp),%eax # Function arg: unsigned int sum |
158 | movl 16(%esp),%ecx # Function arg: int len | |
159 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | |
160 | ||
161 | testl $3, %esi | |
162 | jnz 25f | |
163 | 10: | |
164 | movl %ecx, %edx | |
165 | movl %ecx, %ebx | |
166 | andl $0x7c, %ebx | |
167 | shrl $7, %ecx | |
168 | addl %ebx,%esi | |
169 | shrl $2, %ebx | |
170 | negl %ebx | |
171 | lea 45f(%ebx,%ebx,2), %ebx | |
172 | testl %esi, %esi | |
173 | jmp *%ebx | |
174 | ||
175 | # Handle 2-byte-aligned regions | |
176 | 20: addw (%esi), %ax | |
177 | lea 2(%esi), %esi | |
178 | adcl $0, %eax | |
179 | jmp 10b | |
180 | 25: | |
181 | testl $1, %esi | |
182 | jz 30f | |
183 | # buf is odd | |
184 | dec %ecx | |
185 | jl 90f | |
186 | movzbl (%esi), %ebx | |
187 | addl %ebx, %eax | |
188 | adcl $0, %eax | |
189 | roll $8, %eax | |
190 | inc %esi | |
191 | testl $2, %esi | |
192 | jz 10b | |
193 | ||
194 | 30: subl $2, %ecx | |
195 | ja 20b | |
196 | je 32f | |
197 | addl $2, %ecx | |
198 | jz 80f | |
199 | movzbl (%esi),%ebx # csumming 1 byte, 2-aligned | |
200 | addl %ebx, %eax | |
201 | adcl $0, %eax | |
202 | jmp 80f | |
203 | 32: | |
204 | addw (%esi), %ax # csumming 2 bytes, 2-aligned | |
205 | adcl $0, %eax | |
206 | jmp 80f | |
207 | ||
208 | 40: | |
209 | addl -128(%esi), %eax | |
210 | adcl -124(%esi), %eax | |
211 | adcl -120(%esi), %eax | |
212 | adcl -116(%esi), %eax | |
213 | adcl -112(%esi), %eax | |
214 | adcl -108(%esi), %eax | |
215 | adcl -104(%esi), %eax | |
216 | adcl -100(%esi), %eax | |
217 | adcl -96(%esi), %eax | |
218 | adcl -92(%esi), %eax | |
219 | adcl -88(%esi), %eax | |
220 | adcl -84(%esi), %eax | |
221 | adcl -80(%esi), %eax | |
222 | adcl -76(%esi), %eax | |
223 | adcl -72(%esi), %eax | |
224 | adcl -68(%esi), %eax | |
225 | adcl -64(%esi), %eax | |
226 | adcl -60(%esi), %eax | |
227 | adcl -56(%esi), %eax | |
228 | adcl -52(%esi), %eax | |
229 | adcl -48(%esi), %eax | |
230 | adcl -44(%esi), %eax | |
231 | adcl -40(%esi), %eax | |
232 | adcl -36(%esi), %eax | |
233 | adcl -32(%esi), %eax | |
234 | adcl -28(%esi), %eax | |
235 | adcl -24(%esi), %eax | |
236 | adcl -20(%esi), %eax | |
237 | adcl -16(%esi), %eax | |
238 | adcl -12(%esi), %eax | |
239 | adcl -8(%esi), %eax | |
240 | adcl -4(%esi), %eax | |
241 | 45: | |
242 | lea 128(%esi), %esi | |
243 | adcl $0, %eax | |
244 | dec %ecx | |
245 | jge 40b | |
246 | movl %edx, %ecx | |
247 | 50: andl $3, %ecx | |
248 | jz 80f | |
249 | ||
250 | # Handle the last 1-3 bytes without jumping | |
251 | notl %ecx # 1->2, 2->1, 3->0, higher bits are masked | |
252 | movl $0xffffff,%ebx # by the shll and shrl instructions | |
253 | shll $3,%ecx | |
254 | shrl %cl,%ebx | |
255 | andl -128(%esi),%ebx # esi is 4-aligned so should be ok | |
256 | addl %ebx,%eax | |
257 | adcl $0,%eax | |
258 | 80: | |
259 | testl $1, 12(%esp) | |
260 | jz 90f | |
261 | roll $8, %eax | |
262 | 90: | |
263 | popl %ebx | |
00e065ea JB |
264 | CFI_ADJUST_CFA_OFFSET -4 |
265 | CFI_RESTORE ebx | |
1da177e4 | 266 | popl %esi |
00e065ea JB |
267 | CFI_ADJUST_CFA_OFFSET -4 |
268 | CFI_RESTORE esi | |
1da177e4 | 269 | ret |
00e065ea JB |
270 | CFI_ENDPROC |
271 | ENDPROC(csum_partial) | |
1da177e4 LT |
272 | |
273 | #endif | |
274 | ||
275 | /* | |
276 | unsigned int csum_partial_copy_generic (const char *src, char *dst, | |
277 | int len, int sum, int *src_err_ptr, int *dst_err_ptr) | |
278 | */ | |
279 | ||
280 | /* | |
281 | * Copy from ds while checksumming, otherwise like csum_partial | |
282 | * | |
283 | * The macros SRC and DST specify the type of access for the instruction. | |
284 | * thus we can call a custom exception handler for all access types. | |
285 | * | |
286 | * FIXME: could someone double-check whether I haven't mixed up some SRC and | |
287 | * DST definitions? It's damn hard to trigger all cases. I hope I got | |
288 | * them all but there's no guarantee. | |
289 | */ | |
290 | ||
291 | #define SRC(y...) \ | |
292 | 9999: y; \ | |
293 | .section __ex_table, "a"; \ | |
294 | .long 9999b, 6001f ; \ | |
295 | .previous | |
296 | ||
297 | #define DST(y...) \ | |
298 | 9999: y; \ | |
299 | .section __ex_table, "a"; \ | |
300 | .long 9999b, 6002f ; \ | |
301 | .previous | |
302 | ||
1da177e4 LT |
303 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM |
304 | ||
305 | #define ARGBASE 16 | |
306 | #define FP 12 | |
307 | ||
00e065ea JB |
308 | ENTRY(csum_partial_copy_generic) |
309 | CFI_STARTPROC | |
1da177e4 | 310 | subl $4,%esp |
00e065ea | 311 | CFI_ADJUST_CFA_OFFSET 4 |
1da177e4 | 312 | pushl %edi |
00e065ea JB |
313 | CFI_ADJUST_CFA_OFFSET 4 |
314 | CFI_REL_OFFSET edi, 0 | |
1da177e4 | 315 | pushl %esi |
00e065ea JB |
316 | CFI_ADJUST_CFA_OFFSET 4 |
317 | CFI_REL_OFFSET esi, 0 | |
1da177e4 | 318 | pushl %ebx |
00e065ea JB |
319 | CFI_ADJUST_CFA_OFFSET 4 |
320 | CFI_REL_OFFSET ebx, 0 | |
1da177e4 LT |
321 | movl ARGBASE+16(%esp),%eax # sum |
322 | movl ARGBASE+12(%esp),%ecx # len | |
323 | movl ARGBASE+4(%esp),%esi # src | |
324 | movl ARGBASE+8(%esp),%edi # dst | |
325 | ||
326 | testl $2, %edi # Check alignment. | |
327 | jz 2f # Jump if alignment is ok. | |
328 | subl $2, %ecx # Alignment uses up two bytes. | |
329 | jae 1f # Jump if we had at least two bytes. | |
330 | addl $2, %ecx # ecx was < 2. Deal with it. | |
331 | jmp 4f | |
332 | SRC(1: movw (%esi), %bx ) | |
333 | addl $2, %esi | |
334 | DST( movw %bx, (%edi) ) | |
335 | addl $2, %edi | |
336 | addw %bx, %ax | |
337 | adcl $0, %eax | |
338 | 2: | |
339 | movl %ecx, FP(%esp) | |
340 | shrl $5, %ecx | |
341 | jz 2f | |
342 | testl %esi, %esi | |
343 | SRC(1: movl (%esi), %ebx ) | |
344 | SRC( movl 4(%esi), %edx ) | |
345 | adcl %ebx, %eax | |
346 | DST( movl %ebx, (%edi) ) | |
347 | adcl %edx, %eax | |
348 | DST( movl %edx, 4(%edi) ) | |
349 | ||
350 | SRC( movl 8(%esi), %ebx ) | |
351 | SRC( movl 12(%esi), %edx ) | |
352 | adcl %ebx, %eax | |
353 | DST( movl %ebx, 8(%edi) ) | |
354 | adcl %edx, %eax | |
355 | DST( movl %edx, 12(%edi) ) | |
356 | ||
357 | SRC( movl 16(%esi), %ebx ) | |
358 | SRC( movl 20(%esi), %edx ) | |
359 | adcl %ebx, %eax | |
360 | DST( movl %ebx, 16(%edi) ) | |
361 | adcl %edx, %eax | |
362 | DST( movl %edx, 20(%edi) ) | |
363 | ||
364 | SRC( movl 24(%esi), %ebx ) | |
365 | SRC( movl 28(%esi), %edx ) | |
366 | adcl %ebx, %eax | |
367 | DST( movl %ebx, 24(%edi) ) | |
368 | adcl %edx, %eax | |
369 | DST( movl %edx, 28(%edi) ) | |
370 | ||
371 | lea 32(%esi), %esi | |
372 | lea 32(%edi), %edi | |
373 | dec %ecx | |
374 | jne 1b | |
375 | adcl $0, %eax | |
376 | 2: movl FP(%esp), %edx | |
377 | movl %edx, %ecx | |
378 | andl $0x1c, %edx | |
379 | je 4f | |
380 | shrl $2, %edx # This clears CF | |
381 | SRC(3: movl (%esi), %ebx ) | |
382 | adcl %ebx, %eax | |
383 | DST( movl %ebx, (%edi) ) | |
384 | lea 4(%esi), %esi | |
385 | lea 4(%edi), %edi | |
386 | dec %edx | |
387 | jne 3b | |
388 | adcl $0, %eax | |
389 | 4: andl $3, %ecx | |
390 | jz 7f | |
391 | cmpl $2, %ecx | |
392 | jb 5f | |
393 | SRC( movw (%esi), %cx ) | |
394 | leal 2(%esi), %esi | |
395 | DST( movw %cx, (%edi) ) | |
396 | leal 2(%edi), %edi | |
397 | je 6f | |
398 | shll $16,%ecx | |
399 | SRC(5: movb (%esi), %cl ) | |
400 | DST( movb %cl, (%edi) ) | |
401 | 6: addl %ecx, %eax | |
402 | adcl $0, %eax | |
403 | 7: | |
404 | 5000: | |
405 | ||
406 | # Exception handler: | |
407 | .section .fixup, "ax" | |
408 | ||
409 | 6001: | |
410 | movl ARGBASE+20(%esp), %ebx # src_err_ptr | |
411 | movl $-EFAULT, (%ebx) | |
412 | ||
413 | # zero the complete destination - computing the rest | |
414 | # is too much work | |
415 | movl ARGBASE+8(%esp), %edi # dst | |
416 | movl ARGBASE+12(%esp), %ecx # len | |
417 | xorl %eax,%eax | |
418 | rep ; stosb | |
419 | ||
420 | jmp 5000b | |
421 | ||
422 | 6002: | |
423 | movl ARGBASE+24(%esp), %ebx # dst_err_ptr | |
424 | movl $-EFAULT,(%ebx) | |
425 | jmp 5000b | |
426 | ||
427 | .previous | |
428 | ||
429 | popl %ebx | |
00e065ea JB |
430 | CFI_ADJUST_CFA_OFFSET -4 |
431 | CFI_RESTORE ebx | |
1da177e4 | 432 | popl %esi |
00e065ea JB |
433 | CFI_ADJUST_CFA_OFFSET -4 |
434 | CFI_RESTORE esi | |
1da177e4 | 435 | popl %edi |
00e065ea JB |
436 | CFI_ADJUST_CFA_OFFSET -4 |
437 | CFI_RESTORE edi | |
1da177e4 | 438 | popl %ecx # equivalent to addl $4,%esp |
00e065ea | 439 | CFI_ADJUST_CFA_OFFSET -4 |
1da177e4 | 440 | ret |
00e065ea JB |
441 | CFI_ENDPROC |
442 | ENDPROC(csum_partial_copy_generic) | |
1da177e4 LT |
443 | |
444 | #else | |
445 | ||
446 | /* Version for PentiumII/PPro */ | |
447 | ||
448 | #define ROUND1(x) \ | |
449 | SRC(movl x(%esi), %ebx ) ; \ | |
450 | addl %ebx, %eax ; \ | |
451 | DST(movl %ebx, x(%edi) ) ; | |
452 | ||
453 | #define ROUND(x) \ | |
454 | SRC(movl x(%esi), %ebx ) ; \ | |
455 | adcl %ebx, %eax ; \ | |
456 | DST(movl %ebx, x(%edi) ) ; | |
457 | ||
458 | #define ARGBASE 12 | |
459 | ||
00e065ea JB |
460 | ENTRY(csum_partial_copy_generic) |
461 | CFI_STARTPROC | |
1da177e4 | 462 | pushl %ebx |
00e065ea JB |
463 | CFI_ADJUST_CFA_OFFSET 4 |
464 | CFI_REL_OFFSET ebx, 0 | |
1da177e4 | 465 | pushl %edi |
00e065ea JB |
466 | CFI_ADJUST_CFA_OFFSET 4 |
467 | CFI_REL_OFFSET edi, 0 | |
1da177e4 | 468 | pushl %esi |
00e065ea JB |
469 | CFI_ADJUST_CFA_OFFSET 4 |
470 | CFI_REL_OFFSET esi, 0 | |
1da177e4 LT |
471 | movl ARGBASE+4(%esp),%esi #src |
472 | movl ARGBASE+8(%esp),%edi #dst | |
473 | movl ARGBASE+12(%esp),%ecx #len | |
474 | movl ARGBASE+16(%esp),%eax #sum | |
475 | # movl %ecx, %edx | |
476 | movl %ecx, %ebx | |
477 | movl %esi, %edx | |
478 | shrl $6, %ecx | |
479 | andl $0x3c, %ebx | |
480 | negl %ebx | |
481 | subl %ebx, %esi | |
482 | subl %ebx, %edi | |
483 | lea -1(%esi),%edx | |
484 | andl $-32,%edx | |
485 | lea 3f(%ebx,%ebx), %ebx | |
486 | testl %esi, %esi | |
487 | jmp *%ebx | |
488 | 1: addl $64,%esi | |
489 | addl $64,%edi | |
490 | SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) | |
491 | ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) | |
492 | ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) | |
493 | ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) | |
494 | ROUND (-16) ROUND(-12) ROUND(-8) ROUND(-4) | |
495 | 3: adcl $0,%eax | |
496 | addl $64, %edx | |
497 | dec %ecx | |
498 | jge 1b | |
499 | 4: movl ARGBASE+12(%esp),%edx #len | |
500 | andl $3, %edx | |
501 | jz 7f | |
502 | cmpl $2, %edx | |
503 | jb 5f | |
504 | SRC( movw (%esi), %dx ) | |
505 | leal 2(%esi), %esi | |
506 | DST( movw %dx, (%edi) ) | |
507 | leal 2(%edi), %edi | |
508 | je 6f | |
509 | shll $16,%edx | |
510 | 5: | |
511 | SRC( movb (%esi), %dl ) | |
512 | DST( movb %dl, (%edi) ) | |
513 | 6: addl %edx, %eax | |
514 | adcl $0, %eax | |
515 | 7: | |
516 | .section .fixup, "ax" | |
517 | 6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr | |
518 | movl $-EFAULT, (%ebx) | |
519 | # zero the complete destination (computing the rest is too much work) | |
520 | movl ARGBASE+8(%esp),%edi # dst | |
521 | movl ARGBASE+12(%esp),%ecx # len | |
522 | xorl %eax,%eax | |
523 | rep; stosb | |
524 | jmp 7b | |
525 | 6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr | |
526 | movl $-EFAULT, (%ebx) | |
527 | jmp 7b | |
528 | .previous | |
529 | ||
530 | popl %esi | |
00e065ea JB |
531 | CFI_ADJUST_CFA_OFFSET -4 |
532 | CFI_RESTORE esi | |
1da177e4 | 533 | popl %edi |
00e065ea JB |
534 | CFI_ADJUST_CFA_OFFSET -4 |
535 | CFI_RESTORE edi | |
1da177e4 | 536 | popl %ebx |
00e065ea JB |
537 | CFI_ADJUST_CFA_OFFSET -4 |
538 | CFI_RESTORE ebx | |
1da177e4 | 539 | ret |
00e065ea JB |
540 | CFI_ENDPROC |
541 | ENDPROC(csum_partial_copy_generic) | |
1da177e4 LT |
542 | |
543 | #undef ROUND | |
544 | #undef ROUND1 | |
545 | ||
546 | #endif |