Revert "sh: Handle calling csum_partial with misaligned data"
[linux-block.git] / arch / sh / lib / checksum.S
CommitLineData
4494ce4f
KM
1/* SPDX-License-Identifier: GPL-2.0+
2 *
3 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
1da177e4
LT
4 *
5 * INET An implementation of the TCP/IP protocol suite for the LINUX
6 * operating system. INET is implemented using the BSD Socket
7 * interface as the means of communication with the user level.
8 *
9 * IP/TCP/UDP checksumming routines
10 *
11 * Authors: Jorge Cwik, <jorge@laser.satlink.net>
12 * Arnt Gulbrandsen, <agulbra@nvg.unit.no>
13 * Tom May, <ftom@netcom.com>
14 * Pentium Pro/II routines:
15 * Alexander Kjeldaas <astor@guardian.no>
16 * Finn Arne Gangstad <finnag@guardian.no>
17 * Lots of code moved from tcp.c and ip.c; see those files
18 * for more names.
19 *
20 * Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
21 * handling.
22 * Andi Kleen, add zeroing on error
23 * converted to pure assembler
24 *
25 * SuperH version: Copyright (C) 1999 Niibe Yutaka
1da177e4
LT
26 */
27
28#include <asm/errno.h>
29#include <linux/linkage.h>
30
31/*
32 * computes a partial checksum, e.g. for TCP/UDP fragments
33 */
34
35/*
b5319c96
GR
36 * unsigned int csum_partial(const unsigned char *buf, int len,
37 * unsigned int sum);
1da177e4
LT
38 */
39
40.text
41ENTRY(csum_partial)
42 /*
43 * Experiments with Ethernet and SLIP connections show that buff
44 * is aligned on either a 2-byte or 4-byte boundary. We get at
45 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
46 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
47 * alignment for the unrolled loop.
48 */
b5319c96 49 mov r5, r1
1da177e4 50 mov r4, r0
b5319c96
GR
51 tst #2, r0 ! Check alignment.
52 bt 2f ! Jump if alignment is ok.
1da177e4
LT
53 !
54 add #-2, r5 ! Alignment uses up two bytes.
55 cmp/pz r5 !
56 bt/s 1f ! Jump if we had at least two bytes.
57 clrt
58 bra 6f
59 add #2, r5 ! r5 was < 2. Deal with it.
601:
b5319c96 61 mov r5, r1 ! Save new len for later use.
1da177e4
LT
62 mov.w @r4+, r0
63 extu.w r0, r0
64 addc r0, r6
65 bf 2f
66 add #1, r6
672:
68 mov #-5, r0
b5319c96
GR
69 shld r0, r5
70 tst r5, r5
1da177e4
LT
71 bt/s 4f ! if it's =0, go to 4f
72 clrt
73 .align 2
743:
75 mov.l @r4+, r0
76 mov.l @r4+, r2
77 mov.l @r4+, r3
78 addc r0, r6
79 mov.l @r4+, r0
80 addc r2, r6
81 mov.l @r4+, r2
82 addc r3, r6
83 mov.l @r4+, r3
84 addc r0, r6
85 mov.l @r4+, r0
86 addc r2, r6
87 mov.l @r4+, r2
88 addc r3, r6
89 addc r0, r6
90 addc r2, r6
91 movt r0
b5319c96 92 dt r5
1da177e4
LT
93 bf/s 3b
94 cmp/eq #1, r0
b5319c96
GR
95 ! here, we know r5==0
96 addc r5, r6 ! add carry to r6
1da177e4 974:
b5319c96 98 mov r1, r0
1da177e4
LT
99 and #0x1c, r0
100 tst r0, r0
b5319c96
GR
101 bt/s 6f
102 mov r0, r5
103 shlr2 r5
1da177e4
LT
104 mov #0, r2
1055:
106 addc r2, r6
107 mov.l @r4+, r2
108 movt r0
b5319c96 109 dt r5
1da177e4
LT
110 bf/s 5b
111 cmp/eq #1, r0
112 addc r2, r6
b5319c96 113 addc r5, r6 ! r5==0 here, so it means add carry-bit
1da177e4 1146:
b5319c96 115 mov r1, r5
1da177e4
LT
116 mov #3, r0
117 and r0, r5
118 tst r5, r5
119 bt 9f ! if it's =0 go to 9f
120 mov #2, r1
121 cmp/hs r1, r5
122 bf 7f
123 mov.w @r4+, r0
124 extu.w r0, r0
125 cmp/eq r1, r5
126 bt/s 8f
127 clrt
128 shll16 r0
129 addc r0, r6
1307:
131 mov.b @r4+, r0
132 extu.b r0, r0
133#ifndef __LITTLE_ENDIAN__
134 shll8 r0
135#endif
1368:
137 addc r0, r6
138 mov #0, r0
cadc4e1a 139 addc r0, r6
1da177e4
LT
1409:
141 rts
142 mov r6, r0
143
144/*
dc16c8a9 145unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
1da177e4
LT
146 */
147
148/*
dc16c8a9
AV
149 * Copy from ds while checksumming, otherwise like csum_partial with initial
150 * sum being ~0U
1da177e4
LT
151 */
152
dc16c8a9 153#define EXC(...) \
1da177e4
LT
154 9999: __VA_ARGS__ ; \
155 .section __ex_table, "a"; \
156 .long 9999b, 6001f ; \
157 .previous
158
1da177e4
LT
159!
160! r4: const char *SRC
161! r5: char *DST
162! r6: int LEN
1da177e4
LT
163!
164ENTRY(csum_partial_copy_generic)
dc16c8a9 165 mov #-1,r7
1da177e4
LT
166 mov #3,r0 ! Check src and dest are equally aligned
167 mov r4,r1
168 and r0,r1
169 and r5,r0
170 cmp/eq r1,r0
171 bf 3f ! Different alignments, use slow version
172 tst #1,r0 ! Check dest word aligned
173 bf 3f ! If not, do it the slow way
174
175 mov #2,r0
176 tst r0,r5 ! Check dest alignment.
177 bt 2f ! Jump if alignment is ok.
178 add #-2,r6 ! Alignment uses up two bytes.
179 cmp/pz r6 ! Jump if we had at least two bytes.
180 bt/s 1f
181 clrt
24ab54cb 182 add #2,r6 ! r6 was < 2. Deal with it.
1da177e4 183 bra 4f
24ab54cb 184 mov r6,r2
1da177e4
LT
185
1863: ! Handle different src and dest alignments.
187 ! This is not common, so simple byte by byte copy will do.
188 mov r6,r2
189 shlr r6
190 tst r6,r6
191 bt 4f
192 clrt
193 .align 2
1945:
dc16c8a9
AV
195EXC( mov.b @r4+,r1 )
196EXC( mov.b @r4+,r0 )
1da177e4 197 extu.b r1,r1
dc16c8a9
AV
198EXC( mov.b r1,@r5 )
199EXC( mov.b r0,@(1,r5) )
1da177e4
LT
200 extu.b r0,r0
201 add #2,r5
202
203#ifdef __LITTLE_ENDIAN__
204 shll8 r0
205#else
206 shll8 r1
207#endif
208 or r1,r0
209
210 addc r0,r7
211 movt r0
212 dt r6
213 bf/s 5b
214 cmp/eq #1,r0
215 mov #0,r0
216 addc r0, r7
217
218 mov r2, r0
219 tst #1, r0
220 bt 7f
221 bra 5f
222 clrt
223
224 ! src and dest equally aligned, but to a two byte boundary.
225 ! Handle first two bytes as a special case
226 .align 2
2271:
dc16c8a9
AV
228EXC( mov.w @r4+,r0 )
229EXC( mov.w r0,@r5 )
1da177e4
LT
230 add #2,r5
231 extu.w r0,r0
232 addc r0,r7
233 mov #0,r0
234 addc r0,r7
2352:
236 mov r6,r2
237 mov #-5,r0
238 shld r0,r6
239 tst r6,r6
240 bt/s 2f
241 clrt
242 .align 2
2431:
dc16c8a9
AV
244EXC( mov.l @r4+,r0 )
245EXC( mov.l @r4+,r1 )
1da177e4 246 addc r0,r7
dc16c8a9
AV
247EXC( mov.l r0,@r5 )
248EXC( mov.l r1,@(4,r5) )
1da177e4
LT
249 addc r1,r7
250
dc16c8a9
AV
251EXC( mov.l @r4+,r0 )
252EXC( mov.l @r4+,r1 )
1da177e4 253 addc r0,r7
dc16c8a9
AV
254EXC( mov.l r0,@(8,r5) )
255EXC( mov.l r1,@(12,r5) )
1da177e4
LT
256 addc r1,r7
257
dc16c8a9
AV
258EXC( mov.l @r4+,r0 )
259EXC( mov.l @r4+,r1 )
1da177e4 260 addc r0,r7
dc16c8a9
AV
261EXC( mov.l r0,@(16,r5) )
262EXC( mov.l r1,@(20,r5) )
1da177e4
LT
263 addc r1,r7
264
dc16c8a9
AV
265EXC( mov.l @r4+,r0 )
266EXC( mov.l @r4+,r1 )
1da177e4 267 addc r0,r7
dc16c8a9
AV
268EXC( mov.l r0,@(24,r5) )
269EXC( mov.l r1,@(28,r5) )
1da177e4
LT
270 addc r1,r7
271 add #32,r5
272 movt r0
273 dt r6
274 bf/s 1b
275 cmp/eq #1,r0
276 mov #0,r0
277 addc r0,r7
278
2792: mov r2,r6
280 mov #0x1c,r0
281 and r0,r6
282 cmp/pl r6
283 bf/s 4f
284 clrt
285 shlr2 r6
2863:
dc16c8a9 287EXC( mov.l @r4+,r0 )
1da177e4 288 addc r0,r7
dc16c8a9 289EXC( mov.l r0,@r5 )
1da177e4
LT
290 add #4,r5
291 movt r0
292 dt r6
293 bf/s 3b
294 cmp/eq #1,r0
295 mov #0,r0
296 addc r0,r7
2974: mov r2,r6
298 mov #3,r0
299 and r0,r6
300 cmp/pl r6
301 bf 7f
302 mov #2,r1
303 cmp/hs r1,r6
304 bf 5f
dc16c8a9
AV
305EXC( mov.w @r4+,r0 )
306EXC( mov.w r0,@r5 )
1da177e4
LT
307 extu.w r0,r0
308 add #2,r5
309 cmp/eq r1,r6
310 bt/s 6f
311 clrt
312 shll16 r0
313 addc r0,r7
3145:
dc16c8a9
AV
315EXC( mov.b @r4+,r0 )
316EXC( mov.b r0,@r5 )
1da177e4
LT
317 extu.b r0,r0
318#ifndef __LITTLE_ENDIAN__
319 shll8 r0
320#endif
3216: addc r0,r7
322 mov #0,r0
323 addc r0,r7
3247:
1da177e4
LT
325
326# Exception handler:
327.section .fixup, "ax"
328
3296001:
dc16c8a9
AV
330 rts
331 mov #0,r0
1da177e4 332.previous
1da177e4
LT
333 rts
334 mov r7,r0