Commit | Line | Data |
---|---|---|
4494ce4f KM |
1 | /* SPDX-License-Identifier: GPL-2.0+ |
2 | * | |
3 | * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $ | |
1da177e4 LT |
4 | * |
5 | * INET An implementation of the TCP/IP protocol suite for the LINUX | |
6 | * operating system. INET is implemented using the BSD Socket | |
7 | * interface as the means of communication with the user level. | |
8 | * | |
9 | * IP/TCP/UDP checksumming routines | |
10 | * | |
11 | * Authors: Jorge Cwik, <jorge@laser.satlink.net> | |
12 | * Arnt Gulbrandsen, <agulbra@nvg.unit.no> | |
13 | * Tom May, <ftom@netcom.com> | |
14 | * Pentium Pro/II routines: | |
15 | * Alexander Kjeldaas <astor@guardian.no> | |
16 | * Finn Arne Gangstad <finnag@guardian.no> | |
17 | * Lots of code moved from tcp.c and ip.c; see those files | |
18 | * for more names. | |
19 | * | |
20 | * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception | |
21 | * handling. | |
22 | * Andi Kleen, add zeroing on error | |
23 | * converted to pure assembler | |
24 | * | |
25 | * SuperH version: Copyright (C) 1999 Niibe Yutaka | |
1da177e4 LT |
26 | */ |
27 | ||
28 | #include <asm/errno.h> | |
29 | #include <linux/linkage.h> | |
30 | ||
31 | /* | |
32 | * computes a partial checksum, e.g. for TCP/UDP fragments | |
33 | */ | |
34 | ||
35 | /* | |
b5319c96 GR |
36 | * unsigned int csum_partial(const unsigned char *buf, int len, |
37 | * unsigned int sum); | |
1da177e4 LT |
38 | */ |
39 | ||
40 | .text | |
41 | ENTRY(csum_partial) | |
42 | /* | |
43 | * Experiments with Ethernet and SLIP connections show that buff | |
44 | * is aligned on either a 2-byte or 4-byte boundary. We get at | |
45 | * least a twofold speedup on 486 and Pentium if it is 4-byte aligned. | |
46 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | |
47 | * alignment for the unrolled loop. | |
48 | */ | |
b5319c96 | 49 | mov r5, r1 |
1da177e4 | 50 | mov r4, r0 |
b5319c96 GR |
51 | tst #2, r0 ! Check alignment. |
52 | bt 2f ! Jump if alignment is ok. | |
1da177e4 LT |
53 | ! |
54 | add #-2, r5 ! Alignment uses up two bytes. | |
55 | cmp/pz r5 ! | |
56 | bt/s 1f ! Jump if we had at least two bytes. | |
57 | clrt | |
58 | bra 6f | |
59 | add #2, r5 ! r5 was < 2. Deal with it. | |
60 | 1: | |
b5319c96 | 61 | mov r5, r1 ! Save new len for later use. |
1da177e4 LT |
62 | mov.w @r4+, r0 |
63 | extu.w r0, r0 | |
64 | addc r0, r6 | |
65 | bf 2f | |
66 | add #1, r6 | |
67 | 2: | |
68 | mov #-5, r0 | |
b5319c96 GR |
69 | shld r0, r5 |
70 | tst r5, r5 | |
1da177e4 LT |
71 | bt/s 4f ! if it's =0, go to 4f |
72 | clrt | |
73 | .align 2 | |
74 | 3: | |
75 | mov.l @r4+, r0 | |
76 | mov.l @r4+, r2 | |
77 | mov.l @r4+, r3 | |
78 | addc r0, r6 | |
79 | mov.l @r4+, r0 | |
80 | addc r2, r6 | |
81 | mov.l @r4+, r2 | |
82 | addc r3, r6 | |
83 | mov.l @r4+, r3 | |
84 | addc r0, r6 | |
85 | mov.l @r4+, r0 | |
86 | addc r2, r6 | |
87 | mov.l @r4+, r2 | |
88 | addc r3, r6 | |
89 | addc r0, r6 | |
90 | addc r2, r6 | |
91 | movt r0 | |
b5319c96 | 92 | dt r5 |
1da177e4 LT |
93 | bf/s 3b |
94 | cmp/eq #1, r0 | |
b5319c96 GR |
95 | ! here, we know r5==0 |
96 | addc r5, r6 ! add carry to r6 | |
1da177e4 | 97 | 4: |
b5319c96 | 98 | mov r1, r0 |
1da177e4 LT |
99 | and #0x1c, r0 |
100 | tst r0, r0 | |
b5319c96 GR |
101 | bt/s 6f |
102 | mov r0, r5 | |
103 | shlr2 r5 | |
1da177e4 LT |
104 | mov #0, r2 |
105 | 5: | |
106 | addc r2, r6 | |
107 | mov.l @r4+, r2 | |
108 | movt r0 | |
b5319c96 | 109 | dt r5 |
1da177e4 LT |
110 | bf/s 5b |
111 | cmp/eq #1, r0 | |
112 | addc r2, r6 | |
b5319c96 | 113 | addc r5, r6 ! r5==0 here, so it means add carry-bit |
1da177e4 | 114 | 6: |
b5319c96 | 115 | mov r1, r5 |
1da177e4 LT |
116 | mov #3, r0 |
117 | and r0, r5 | |
118 | tst r5, r5 | |
119 | bt 9f ! if it's =0 go to 9f | |
120 | mov #2, r1 | |
121 | cmp/hs r1, r5 | |
122 | bf 7f | |
123 | mov.w @r4+, r0 | |
124 | extu.w r0, r0 | |
125 | cmp/eq r1, r5 | |
126 | bt/s 8f | |
127 | clrt | |
128 | shll16 r0 | |
129 | addc r0, r6 | |
130 | 7: | |
131 | mov.b @r4+, r0 | |
132 | extu.b r0, r0 | |
133 | #ifndef __LITTLE_ENDIAN__ | |
134 | shll8 r0 | |
135 | #endif | |
136 | 8: | |
137 | addc r0, r6 | |
138 | mov #0, r0 | |
cadc4e1a | 139 | addc r0, r6 |
1da177e4 LT |
140 | 9: |
141 | rts | |
142 | mov r6, r0 | |
143 | ||
144 | /* | |
dc16c8a9 | 145 | unsigned int csum_partial_copy_generic (const char *src, char *dst, int len) |
1da177e4 LT |
146 | */ |
147 | ||
148 | /* | |
dc16c8a9 AV |
149 | * Copy from ds while checksumming, otherwise like csum_partial with initial |
150 | * sum being ~0U | |
1da177e4 LT |
151 | */ |
152 | ||
dc16c8a9 | 153 | #define EXC(...) \ |
1da177e4 LT |
154 | 9999: __VA_ARGS__ ; \ |
155 | .section __ex_table, "a"; \ | |
156 | .long 9999b, 6001f ; \ | |
157 | .previous | |
158 | ||
1da177e4 LT |
159 | ! |
160 | ! r4: const char *SRC | |
161 | ! r5: char *DST | |
162 | ! r6: int LEN | |
1da177e4 LT |
163 | ! |
164 | ENTRY(csum_partial_copy_generic) | |
dc16c8a9 | 165 | mov #-1,r7 |
1da177e4 LT |
166 | mov #3,r0 ! Check src and dest are equally aligned |
167 | mov r4,r1 | |
168 | and r0,r1 | |
169 | and r5,r0 | |
170 | cmp/eq r1,r0 | |
171 | bf 3f ! Different alignments, use slow version | |
172 | tst #1,r0 ! Check dest word aligned | |
173 | bf 3f ! If not, do it the slow way | |
174 | ||
175 | mov #2,r0 | |
176 | tst r0,r5 ! Check dest alignment. | |
177 | bt 2f ! Jump if alignment is ok. | |
178 | add #-2,r6 ! Alignment uses up two bytes. | |
179 | cmp/pz r6 ! Jump if we had at least two bytes. | |
180 | bt/s 1f | |
181 | clrt | |
24ab54cb | 182 | add #2,r6 ! r6 was < 2. Deal with it. |
1da177e4 | 183 | bra 4f |
24ab54cb | 184 | mov r6,r2 |
1da177e4 LT |
185 | |
186 | 3: ! Handle different src and dest alignments. | |
187 | ! This is not common, so simple byte by byte copy will do. | |
188 | mov r6,r2 | |
189 | shlr r6 | |
190 | tst r6,r6 | |
191 | bt 4f | |
192 | clrt | |
193 | .align 2 | |
194 | 5: | |
dc16c8a9 AV |
195 | EXC( mov.b @r4+,r1 ) |
196 | EXC( mov.b @r4+,r0 ) | |
1da177e4 | 197 | extu.b r1,r1 |
dc16c8a9 AV |
198 | EXC( mov.b r1,@r5 ) |
199 | EXC( mov.b r0,@(1,r5) ) | |
1da177e4 LT |
200 | extu.b r0,r0 |
201 | add #2,r5 | |
202 | ||
203 | #ifdef __LITTLE_ENDIAN__ | |
204 | shll8 r0 | |
205 | #else | |
206 | shll8 r1 | |
207 | #endif | |
208 | or r1,r0 | |
209 | ||
210 | addc r0,r7 | |
211 | movt r0 | |
212 | dt r6 | |
213 | bf/s 5b | |
214 | cmp/eq #1,r0 | |
215 | mov #0,r0 | |
216 | addc r0, r7 | |
217 | ||
218 | mov r2, r0 | |
219 | tst #1, r0 | |
220 | bt 7f | |
221 | bra 5f | |
222 | clrt | |
223 | ||
224 | ! src and dest equally aligned, but to a two byte boundary. | |
225 | ! Handle first two bytes as a special case | |
226 | .align 2 | |
227 | 1: | |
dc16c8a9 AV |
228 | EXC( mov.w @r4+,r0 ) |
229 | EXC( mov.w r0,@r5 ) | |
1da177e4 LT |
230 | add #2,r5 |
231 | extu.w r0,r0 | |
232 | addc r0,r7 | |
233 | mov #0,r0 | |
234 | addc r0,r7 | |
235 | 2: | |
236 | mov r6,r2 | |
237 | mov #-5,r0 | |
238 | shld r0,r6 | |
239 | tst r6,r6 | |
240 | bt/s 2f | |
241 | clrt | |
242 | .align 2 | |
243 | 1: | |
dc16c8a9 AV |
244 | EXC( mov.l @r4+,r0 ) |
245 | EXC( mov.l @r4+,r1 ) | |
1da177e4 | 246 | addc r0,r7 |
dc16c8a9 AV |
247 | EXC( mov.l r0,@r5 ) |
248 | EXC( mov.l r1,@(4,r5) ) | |
1da177e4 LT |
249 | addc r1,r7 |
250 | ||
dc16c8a9 AV |
251 | EXC( mov.l @r4+,r0 ) |
252 | EXC( mov.l @r4+,r1 ) | |
1da177e4 | 253 | addc r0,r7 |
dc16c8a9 AV |
254 | EXC( mov.l r0,@(8,r5) ) |
255 | EXC( mov.l r1,@(12,r5) ) | |
1da177e4 LT |
256 | addc r1,r7 |
257 | ||
dc16c8a9 AV |
258 | EXC( mov.l @r4+,r0 ) |
259 | EXC( mov.l @r4+,r1 ) | |
1da177e4 | 260 | addc r0,r7 |
dc16c8a9 AV |
261 | EXC( mov.l r0,@(16,r5) ) |
262 | EXC( mov.l r1,@(20,r5) ) | |
1da177e4 LT |
263 | addc r1,r7 |
264 | ||
dc16c8a9 AV |
265 | EXC( mov.l @r4+,r0 ) |
266 | EXC( mov.l @r4+,r1 ) | |
1da177e4 | 267 | addc r0,r7 |
dc16c8a9 AV |
268 | EXC( mov.l r0,@(24,r5) ) |
269 | EXC( mov.l r1,@(28,r5) ) | |
1da177e4 LT |
270 | addc r1,r7 |
271 | add #32,r5 | |
272 | movt r0 | |
273 | dt r6 | |
274 | bf/s 1b | |
275 | cmp/eq #1,r0 | |
276 | mov #0,r0 | |
277 | addc r0,r7 | |
278 | ||
279 | 2: mov r2,r6 | |
280 | mov #0x1c,r0 | |
281 | and r0,r6 | |
282 | cmp/pl r6 | |
283 | bf/s 4f | |
284 | clrt | |
285 | shlr2 r6 | |
286 | 3: | |
dc16c8a9 | 287 | EXC( mov.l @r4+,r0 ) |
1da177e4 | 288 | addc r0,r7 |
dc16c8a9 | 289 | EXC( mov.l r0,@r5 ) |
1da177e4 LT |
290 | add #4,r5 |
291 | movt r0 | |
292 | dt r6 | |
293 | bf/s 3b | |
294 | cmp/eq #1,r0 | |
295 | mov #0,r0 | |
296 | addc r0,r7 | |
297 | 4: mov r2,r6 | |
298 | mov #3,r0 | |
299 | and r0,r6 | |
300 | cmp/pl r6 | |
301 | bf 7f | |
302 | mov #2,r1 | |
303 | cmp/hs r1,r6 | |
304 | bf 5f | |
dc16c8a9 AV |
305 | EXC( mov.w @r4+,r0 ) |
306 | EXC( mov.w r0,@r5 ) | |
1da177e4 LT |
307 | extu.w r0,r0 |
308 | add #2,r5 | |
309 | cmp/eq r1,r6 | |
310 | bt/s 6f | |
311 | clrt | |
312 | shll16 r0 | |
313 | addc r0,r7 | |
314 | 5: | |
dc16c8a9 AV |
315 | EXC( mov.b @r4+,r0 ) |
316 | EXC( mov.b r0,@r5 ) | |
1da177e4 LT |
317 | extu.b r0,r0 |
318 | #ifndef __LITTLE_ENDIAN__ | |
319 | shll8 r0 | |
320 | #endif | |
321 | 6: addc r0,r7 | |
322 | mov #0,r0 | |
323 | addc r0,r7 | |
324 | 7: | |
1da177e4 LT |
325 | |
326 | # Exception handler: | |
327 | .section .fixup, "ax" | |
328 | ||
329 | 6001: | |
dc16c8a9 AV |
330 | rts |
331 | mov #0,r0 | |
1da177e4 | 332 | .previous |
1da177e4 LT |
333 | rts |
334 | mov r7,r0 |