[linux-block.git] / arch / sh / lib / checksum.S

/* SPDX-License-Identifier: GPL-2.0+
 *
 * $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
 *
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		IP/TCP/UDP checksumming routines
 *
 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *		Tom May, <ftom@netcom.com>
 *              Pentium Pro/II routines:
 *              Alexander Kjeldaas <astor@guardian.no>
 *              Finn Arne Gangstad <finnag@guardian.no>
 *		Lots of code moved from tcp.c and ip.c; see those files
 *		for more names.
 *
 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
 *			     handling.
 *		Andi Kleen,  add zeroing on error
 *                   converted to pure assembler
 *
 * SuperH version:  Copyright (C) 1999  Niibe Yutaka
 */

#include <asm/errno.h>
#include <linux/linkage.h>

/*
 * computes a partial checksum, e.g. for TCP/UDP fragments
 */

/*	
 * unsigned int csum_partial(const unsigned char *buf, int len,
 *                           unsigned int sum);
 */

.text
ENTRY(csum_partial)
	  /*
	   * Experiments with Ethernet and SLIP connections show that buff
	   * is aligned on either a 2-byte or 4-byte boundary.  We get at
	   * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
	   * Fortunately, it is easy to convert 2-byte alignment to 4-byte
	   * alignment for the unrolled loop.
	   */
	mov	r5, r1
	mov	r4, r0
	tst	#2, r0		! Check alignment.
	bt	2f		! Jump if alignment is ok.
	!
	add	#-2, r5		! Alignment uses up two bytes.
	cmp/pz	r5		!
	bt/s	1f		! Jump if we had at least two bytes.
	 clrt
	bra	6f
	 add	#2, r5		! r5 was < 2.  Deal with it.
1:
	mov	r5, r1		! Save new len for later use.
	mov.w	@r4+, r0
	extu.w	r0, r0
	addc	r0, r6
	bf	2f
	add	#1, r6
2:
	mov	#-5, r0
	shld	r0, r5
	tst	r5, r5
	bt/s	4f		! if it's =0, go to 4f
	 clrt
	.align	2
3:
	mov.l	@r4+, r0
	mov.l	@r4+, r2
	mov.l	@r4+, r3
	addc	r0, r6
	mov.l	@r4+, r0
	addc	r2, r6
	mov.l	@r4+, r2
	addc	r3, r6
	mov.l	@r4+, r3
	addc	r0, r6
	mov.l	@r4+, r0
	addc	r2, r6
	mov.l	@r4+, r2
	addc	r3, r6
	addc	r0, r6
	addc	r2, r6
	movt	r0
	dt	r5
	bf/s	3b
	 cmp/eq	#1, r0
	! here, we know r5==0
	addc	r5, r6			! add carry to r6
4:
	mov	r1, r0
	and	#0x1c, r0
	tst	r0, r0
	bt/s	6f
	 mov	r0, r5
	shlr2	r5
	mov	#0, r2
5:
	addc	r2, r6
	mov.l	@r4+, r2
	movt	r0
	dt	r5
	bf/s	5b
	 cmp/eq	#1, r0
	addc	r2, r6
	addc	r5, r6		! r5==0 here, so it means add carry-bit
6:
	mov	r1, r5
	mov	#3, r0
	and	r0, r5
	tst	r5, r5
	bt	9f		! if it's =0 go to 9f
	mov	#2, r1
	cmp/hs  r1, r5
	bf	7f
	mov.w	@r4+, r0
	extu.w	r0, r0
	cmp/eq	r1, r5
	bt/s	8f
	 clrt
	shll16	r0
	addc	r0, r6
7:
	mov.b	@r4+, r0
	extu.b	r0, r0
#ifndef	__LITTLE_ENDIAN__
	shll8	r0
#endif
8:
	addc	r0, r6
	mov	#0, r0
	addc	r0, r6
9:
	rts
	 mov	r6, r0

/*
unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
 */ 

/*
 * Copy from ds while checksumming, otherwise like csum_partial with initial
 * sum being ~0U
 */

#define EXC(...)			\
	9999: __VA_ARGS__ ;		\
	.section __ex_table, "a";	\
	.long 9999b, 6001f	;	\
	.previous

!
! r4:	const char *SRC
! r5:	char *DST
! r6:	int LEN
!
ENTRY(csum_partial_copy_generic)
	mov	#-1,r7
	mov	#3,r0		! Check src and dest are equally aligned
	mov	r4,r1
	and	r0,r1
	and	r5,r0
	cmp/eq	r1,r0
	bf	3f		! Different alignments, use slow version
	tst	#1,r0		! Check dest word aligned
	bf	3f		! If not, do it the slow way

	mov	#2,r0
	tst	r0,r5		! Check dest alignment. 
	bt	2f		! Jump if alignment is ok.
	add	#-2,r6		! Alignment uses up two bytes.
	cmp/pz	r6		! Jump if we had at least two bytes.
	bt/s	1f
	 clrt
	add	#2,r6		! r6 was < 2.	Deal with it.
	bra	4f
	 mov	r6,r2

3:	! Handle different src and dest alignments.
	! This is not common, so simple byte by byte copy will do.
	mov	r6,r2
	shlr	r6
	tst	r6,r6
	bt	4f
	clrt
	.align	2
5:
EXC(	mov.b	@r4+,r1 	)
EXC(	mov.b	@r4+,r0		)
	extu.b	r1,r1
EXC(	mov.b	r1,@r5		)
EXC(	mov.b	r0,@(1,r5)	)
	extu.b	r0,r0
	add	#2,r5

#ifdef	__LITTLE_ENDIAN__
	shll8	r0
#else
	shll8	r1
#endif
	or	r1,r0

	addc	r0,r7
	movt	r0
	dt	r6
	bf/s	5b
	 cmp/eq	#1,r0
	mov	#0,r0
	addc	r0, r7

	mov	r2, r0
	tst	#1, r0
	bt	7f
	bra	5f
	 clrt

	! src and dest equally aligned, but to a two byte boundary.
	! Handle first two bytes as a special case
	.align	2
1:	
EXC(	mov.w	@r4+,r0		)
EXC(	mov.w	r0,@r5		)
	add	#2,r5
	extu.w	r0,r0
	addc	r0,r7
	mov	#0,r0
	addc	r0,r7
2:
	mov	r6,r2
	mov	#-5,r0
	shld	r0,r6
	tst	r6,r6
	bt/s	2f
	 clrt
	.align	2
1:	
EXC(	mov.l	@r4+,r0		)
EXC(	mov.l	@r4+,r1		)
	addc	r0,r7
EXC(	mov.l	r0,@r5		)
EXC(	mov.l	r1,@(4,r5)	)
	addc	r1,r7

EXC(	mov.l	@r4+,r0		)
EXC(	mov.l	@r4+,r1		)
	addc	r0,r7
EXC(	mov.l	r0,@(8,r5)	)
EXC(	mov.l	r1,@(12,r5)	)
	addc	r1,r7

EXC(	mov.l	@r4+,r0 	)
EXC(	mov.l	@r4+,r1		)
	addc	r0,r7
EXC(	mov.l	r0,@(16,r5)	)
EXC(	mov.l	r1,@(20,r5)	)
	addc	r1,r7

EXC(	mov.l	@r4+,r0		)
EXC(	mov.l	@r4+,r1		)
	addc	r0,r7
EXC(	mov.l	r0,@(24,r5)	)
EXC(	mov.l	r1,@(28,r5)	)
	addc	r1,r7
	add	#32,r5
	movt	r0
	dt	r6
	bf/s	1b
	 cmp/eq	#1,r0
	mov	#0,r0
	addc	r0,r7

2:	mov	r2,r6
	mov	#0x1c,r0
	and	r0,r6
	cmp/pl	r6
	bf/s	4f
	 clrt
	shlr2	r6
3:	
EXC(	mov.l	@r4+,r0	)
	addc	r0,r7
EXC(	mov.l	r0,@r5	)
	add	#4,r5
	movt	r0
	dt	r6
	bf/s	3b
	 cmp/eq	#1,r0
	mov	#0,r0
	addc	r0,r7
4:	mov	r2,r6
	mov	#3,r0
	and	r0,r6
	cmp/pl	r6
	bf	7f
	mov	#2,r1
	cmp/hs	r1,r6
	bf	5f
EXC(	mov.w	@r4+,r0	)
EXC(	mov.w	r0,@r5	)
	extu.w	r0,r0
	add	#2,r5
	cmp/eq	r1,r6
	bt/s	6f
	 clrt
	shll16	r0
	addc	r0,r7
5:	
EXC(	mov.b	@r4+,r0	)
EXC(	mov.b	r0,@r5	)
	extu.b	r0,r0
#ifndef	__LITTLE_ENDIAN__
	shll8	r0
#endif
6:	addc	r0,r7
	mov	#0,r0
	addc	r0,r7
7:

# Exception handler:
.section .fixup, "ax"							

6001:
	rts
	 mov	#0,r0
.previous
	rts
	 mov	r7,r0
Commit	Line	Data
4494ce4f KM	1	/* SPDX-License-Identifier: GPL-2.0+
	2	*
	3	* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 gniibe Exp $
1da177e4 LT	4	*
	5	* INET An implementation of the TCP/IP protocol suite for the LINUX
	6	* operating system. INET is implemented using the BSD Socket
	7	* interface as the means of communication with the user level.
	8	*
	9	* IP/TCP/UDP checksumming routines
	10	*
	11	* Authors: Jorge Cwik, <jorge@laser.satlink.net>
	12	* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
	13	* Tom May, <ftom@netcom.com>
	14	* Pentium Pro/II routines:
	15	* Alexander Kjeldaas <astor@guardian.no>
	16	* Finn Arne Gangstad <finnag@guardian.no>
	17	* Lots of code moved from tcp.c and ip.c; see those files
	18	* for more names.
	19	*
	20	* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
	21	* handling.
	22	* Andi Kleen, add zeroing on error
	23	* converted to pure assembler
	24	*
	25	* SuperH version: Copyright (C) 1999 Niibe Yutaka
1da177e4 LT	26	*/
	27
	28	#include <asm/errno.h>
	29	#include <linux/linkage.h>
	30
	31	/*
	32	* computes a partial checksum, e.g. for TCP/UDP fragments
	33	*/
	34
	35	/*
b5319c96 GR	36	* unsigned int csum_partial(const unsigned char *buf, int len,
b5319c96 GR	37	* unsigned int sum);
1da177e4 LT	38	*/
	39
	40	.text
	41	ENTRY(csum_partial)
	42	/*
	43	* Experiments with Ethernet and SLIP connections show that buff
	44	* is aligned on either a 2-byte or 4-byte boundary. We get at
	45	* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
	46	* Fortunately, it is easy to convert 2-byte alignment to 4-byte
	47	* alignment for the unrolled loop.
	48	*/
b5319c96	49	mov r5, r1
1da177e4	50	mov r4, r0
b5319c96 GR	51	tst #2, r0 ! Check alignment.
b5319c96 GR	52	bt 2f ! Jump if alignment is ok.
1da177e4 LT	53	!
	54	add #-2, r5 ! Alignment uses up two bytes.
	55	cmp/pz r5 !
	56	bt/s 1f ! Jump if we had at least two bytes.
	57	clrt
	58	bra 6f
	59	add #2, r5 ! r5 was < 2. Deal with it.
	60	1:
b5319c96	61	mov r5, r1 ! Save new len for later use.
1da177e4 LT	62	mov.w @r4+, r0
	63	extu.w r0, r0
	64	addc r0, r6
	65	bf 2f
	66	add #1, r6
	67	2:
	68	mov #-5, r0
b5319c96 GR	69	shld r0, r5
b5319c96 GR	70	tst r5, r5
1da177e4 LT	71	bt/s 4f ! if it's =0, go to 4f
	72	clrt
	73	.align 2
	74	3:
	75	mov.l @r4+, r0
	76	mov.l @r4+, r2
	77	mov.l @r4+, r3
	78	addc r0, r6
	79	mov.l @r4+, r0
	80	addc r2, r6
	81	mov.l @r4+, r2
	82	addc r3, r6
	83	mov.l @r4+, r3
	84	addc r0, r6
	85	mov.l @r4+, r0
	86	addc r2, r6
	87	mov.l @r4+, r2
	88	addc r3, r6
	89	addc r0, r6
	90	addc r2, r6
	91	movt r0
b5319c96	92	dt r5
1da177e4 LT	93	bf/s 3b
1da177e4 LT	94	cmp/eq #1, r0
b5319c96 GR	95	! here, we know r5==0
b5319c96 GR	96	addc r5, r6 ! add carry to r6
1da177e4	97	4:
b5319c96	98	mov r1, r0
1da177e4 LT	99	and #0x1c, r0
1da177e4 LT	100	tst r0, r0
b5319c96 GR	101	bt/s 6f
	102	mov r0, r5
	103	shlr2 r5
1da177e4 LT	104	mov #0, r2
	105	5:
	106	addc r2, r6
	107	mov.l @r4+, r2
	108	movt r0
b5319c96	109	dt r5
1da177e4 LT	110	bf/s 5b
	111	cmp/eq #1, r0
	112	addc r2, r6
b5319c96	113	addc r5, r6 ! r5==0 here, so it means add carry-bit
1da177e4	114	6:
b5319c96	115	mov r1, r5
1da177e4 LT	116	mov #3, r0
	117	and r0, r5
	118	tst r5, r5
	119	bt 9f ! if it's =0 go to 9f
	120	mov #2, r1
	121	cmp/hs r1, r5
	122	bf 7f
	123	mov.w @r4+, r0
	124	extu.w r0, r0
	125	cmp/eq r1, r5
	126	bt/s 8f
	127	clrt
	128	shll16 r0
	129	addc r0, r6
	130	7:
	131	mov.b @r4+, r0
	132	extu.b r0, r0
	133	#ifndef __LITTLE_ENDIAN__
	134	shll8 r0
	135	#endif
	136	8:
	137	addc r0, r6
	138	mov #0, r0
cadc4e1a	139	addc r0, r6
1da177e4 LT	140	9:
	141	rts
	142	mov r6, r0
	143
	144	/*
dc16c8a9	145	unsigned int csum_partial_copy_generic (const char src, char dst, int len)
1da177e4 LT	146	*/
	147
	148	/*
dc16c8a9 AV	149	* Copy from ds while checksumming, otherwise like csum_partial with initial
dc16c8a9 AV	150	* sum being ~0U
1da177e4 LT	151	*/
1da177e4 LT	152
dc16c8a9	153	#define EXC(...) \
1da177e4 LT	154	9999: __VA_ARGS__ ; \
	155	.section __ex_table, "a"; \
	156	.long 9999b, 6001f ; \
	157	.previous
	158
1da177e4 LT	159	!
	160	! r4: const char *SRC
	161	! r5: char *DST
	162	! r6: int LEN
1da177e4 LT	163	!
1da177e4 LT	164	ENTRY(csum_partial_copy_generic)
dc16c8a9	165	mov #-1,r7
1da177e4 LT	166	mov #3,r0 ! Check src and dest are equally aligned
	167	mov r4,r1
	168	and r0,r1
	169	and r5,r0
	170	cmp/eq r1,r0
	171	bf 3f ! Different alignments, use slow version
	172	tst #1,r0 ! Check dest word aligned
	173	bf 3f ! If not, do it the slow way
	174
	175	mov #2,r0
	176	tst r0,r5 ! Check dest alignment.
	177	bt 2f ! Jump if alignment is ok.
	178	add #-2,r6 ! Alignment uses up two bytes.
	179	cmp/pz r6 ! Jump if we had at least two bytes.
	180	bt/s 1f
	181	clrt
24ab54cb	182	add #2,r6 ! r6 was < 2. Deal with it.
1da177e4	183	bra 4f
24ab54cb	184	mov r6,r2
1da177e4 LT	185
	186	3: ! Handle different src and dest alignments.
	187	! This is not common, so simple byte by byte copy will do.
	188	mov r6,r2
	189	shlr r6
	190	tst r6,r6
	191	bt 4f
	192	clrt
	193	.align 2
	194	5:
dc16c8a9 AV	195	EXC( mov.b @r4+,r1 )
dc16c8a9 AV	196	EXC( mov.b @r4+,r0 )
1da177e4	197	extu.b r1,r1
dc16c8a9 AV	198	EXC( mov.b r1,@r5 )
dc16c8a9 AV	199	EXC( mov.b r0,@(1,r5) )
1da177e4 LT	200	extu.b r0,r0
	201	add #2,r5
	202
	203	#ifdef __LITTLE_ENDIAN__
	204	shll8 r0
	205	#else
	206	shll8 r1
	207	#endif
	208	or r1,r0
	209
	210	addc r0,r7
	211	movt r0
	212	dt r6
	213	bf/s 5b
	214	cmp/eq #1,r0
	215	mov #0,r0
	216	addc r0, r7
	217
	218	mov r2, r0
	219	tst #1, r0
	220	bt 7f
	221	bra 5f
	222	clrt
	223
	224	! src and dest equally aligned, but to a two byte boundary.
	225	! Handle first two bytes as a special case
	226	.align 2
	227	1:
dc16c8a9 AV	228	EXC( mov.w @r4+,r0 )
dc16c8a9 AV	229	EXC( mov.w r0,@r5 )
1da177e4 LT	230	add #2,r5
	231	extu.w r0,r0
	232	addc r0,r7
	233	mov #0,r0
	234	addc r0,r7
	235	2:
	236	mov r6,r2
	237	mov #-5,r0
	238	shld r0,r6
	239	tst r6,r6
	240	bt/s 2f
	241	clrt
	242	.align 2
	243	1:
dc16c8a9 AV	244	EXC( mov.l @r4+,r0 )
dc16c8a9 AV	245	EXC( mov.l @r4+,r1 )
1da177e4	246	addc r0,r7
dc16c8a9 AV	247	EXC( mov.l r0,@r5 )
dc16c8a9 AV	248	EXC( mov.l r1,@(4,r5) )
1da177e4 LT	249	addc r1,r7
1da177e4 LT	250
dc16c8a9 AV	251	EXC( mov.l @r4+,r0 )
dc16c8a9 AV	252	EXC( mov.l @r4+,r1 )
1da177e4	253	addc r0,r7
dc16c8a9 AV	254	EXC( mov.l r0,@(8,r5) )
dc16c8a9 AV	255	EXC( mov.l r1,@(12,r5) )
1da177e4 LT	256	addc r1,r7
1da177e4 LT	257
dc16c8a9 AV	258	EXC( mov.l @r4+,r0 )
dc16c8a9 AV	259	EXC( mov.l @r4+,r1 )
1da177e4	260	addc r0,r7
dc16c8a9 AV	261	EXC( mov.l r0,@(16,r5) )
dc16c8a9 AV	262	EXC( mov.l r1,@(20,r5) )
1da177e4 LT	263	addc r1,r7
1da177e4 LT	264
dc16c8a9 AV	265	EXC( mov.l @r4+,r0 )
dc16c8a9 AV	266	EXC( mov.l @r4+,r1 )
1da177e4	267	addc r0,r7
dc16c8a9 AV	268	EXC( mov.l r0,@(24,r5) )
dc16c8a9 AV	269	EXC( mov.l r1,@(28,r5) )
1da177e4 LT	270	addc r1,r7
	271	add #32,r5
	272	movt r0
	273	dt r6
	274	bf/s 1b
	275	cmp/eq #1,r0
	276	mov #0,r0
	277	addc r0,r7
	278
	279	2: mov r2,r6
	280	mov #0x1c,r0
	281	and r0,r6
	282	cmp/pl r6
	283	bf/s 4f
	284	clrt
	285	shlr2 r6
	286	3:
dc16c8a9	287	EXC( mov.l @r4+,r0 )
1da177e4	288	addc r0,r7
dc16c8a9	289	EXC( mov.l r0,@r5 )
1da177e4 LT	290	add #4,r5
	291	movt r0
	292	dt r6
	293	bf/s 3b
	294	cmp/eq #1,r0
	295	mov #0,r0
	296	addc r0,r7
	297	4: mov r2,r6
	298	mov #3,r0
	299	and r0,r6
	300	cmp/pl r6
	301	bf 7f
	302	mov #2,r1
	303	cmp/hs r1,r6
	304	bf 5f
dc16c8a9 AV	305	EXC( mov.w @r4+,r0 )
dc16c8a9 AV	306	EXC( mov.w r0,@r5 )
1da177e4 LT	307	extu.w r0,r0
	308	add #2,r5
	309	cmp/eq r1,r6
	310	bt/s 6f
	311	clrt
	312	shll16 r0
	313	addc r0,r7
	314	5:
dc16c8a9 AV	315	EXC( mov.b @r4+,r0 )
dc16c8a9 AV	316	EXC( mov.b r0,@r5 )
1da177e4 LT	317	extu.b r0,r0
	318	#ifndef __LITTLE_ENDIAN__
	319	shll8 r0
	320	#endif
	321	6: addc r0,r7
	322	mov #0,r0
	323	addc r0,r7
	324	7:
1da177e4 LT	325
	326	# Exception handler:
	327	.section .fixup, "ax"
	328
	329	6001:
dc16c8a9 AV	330	rts
dc16c8a9 AV	331	mov #0,r0
1da177e4	332	.previous
1da177e4 LT	333	rts
1da177e4 LT	334	mov r7,r0