[linux-block.git] / arch / mn10300 / lib / do_csum.S

/* Optimised simple memory checksum
 *
 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public Licence
 * as published by the Free Software Foundation; either version
 * 2 of the Licence, or (at your option) any later version.
 */
#include <asm/cache.h>

        .section .text
        .balign	L1_CACHE_BYTES

###############################################################################
#
# unsigned int do_csum(const unsigned char *buff, size_t len)
#
###############################################################################
	.globl	do_csum
        .type	do_csum,@function
do_csum:
	movm	[d2,d3],(sp)
	mov	d0,(12,sp)
	mov	d1,(16,sp)
	mov	d1,d2				# count
	mov	d0,a0				# buff
	clr	d1				# accumulator

	cmp	+0,d2
	beq	do_csum_done			# return if zero-length buffer

	# 4-byte align the buffer pointer
	btst	+3,a0
	beq	do_csum_now_4b_aligned

	btst	+1,a0
	beq	do_csum_addr_not_odd
	movbu	(a0),d0
	inc	a0
	asl	+8,d0
	add	d0,d1
	addc	+0,d1
	add	-1,d2
do_csum_addr_not_odd:

	cmp	+2,d2
	bcs	do_csum_fewer_than_4
	btst	+2,a0
	beq	do_csum_now_4b_aligned
	movhu	(a0+),d0
	add	d0,d1
	addc	+0,d1
	add	-2,d2
	cmp	+4,d2
	bcs	do_csum_fewer_than_4

do_csum_now_4b_aligned:
	# we want to checksum as much as we can in chunks of 32 bytes
	cmp	+31,d2
	bls	do_csum_remainder		# 4-byte aligned remainder

	add	-32,d2
	mov	+32,d3

do_csum_loop:
	mov	(a0+),d0
	add	d0,d1
	mov	(a0+),e0
	addc	e0,d1
	mov	(a0+),e1
	addc	e1,d1
	mov	(a0+),e3
	addc	e3,d1
	mov	(a0+),d0
	addc	d0,d1
	mov	(a0+),e0
	addc	e0,d1
	mov	(a0+),e1
	addc	e1,d1
	mov	(a0+),e3
	addc	e3,d1
	addc	+0,d1

	sub	d3,d2
	bcc	do_csum_loop

	add	d3,d2
	beq	do_csum_done

do_csum_remainder:
	# cut 16-31 bytes down to 0-15
	cmp	+16,d2
	bcs	do_csum_fewer_than_16
	mov	(a0+),d0
	add	d0,d1
	mov	(a0+),e0
	addc	e0,d1
	mov	(a0+),e1
	addc	e1,d1
	mov	(a0+),e3
	addc	e3,d1
	addc	+0,d1
	add	-16,d2
	beq	do_csum_done

do_csum_fewer_than_16:
	# copy the remaining whole words
	cmp	+4,d2
	bcs	do_csum_fewer_than_4
	cmp	+8,d2
	bcs	do_csum_one_word
	cmp	+12,d2
	bcs	do_csum_two_words
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1
do_csum_two_words:
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1
do_csum_one_word:
	mov	(a0+),d0
	add	d0,d1
	addc	+0,d1

do_csum_fewer_than_4:
	and	+3,d2
	beq	do_csum_done
	xor_cmp	d0,d0,+2,d2
	bcs	do_csum_fewer_than_2
	movhu	(a0+),d0
do_csum_fewer_than_2:
	and	+1,d2
	beq	do_csum_add_last_bit
	movbu	(a0),d3
	add	d3,d0
do_csum_add_last_bit:
	add	d0,d1
	addc	+0,d1

do_csum_done:
	# compress the checksum down to 16 bits
	mov	+0xffff0000,d2
	and	d1,d2
	asl	+16,d1
	add	d2,d1,d0
	addc	+0xffff,d0
	lsr	+16,d0

	# flip the halves of the word result if the buffer was oddly aligned
	mov	(12,sp),d1
	and	+1,d1
	beq	do_csum_not_oddly_aligned
	swaph	d0,d0				# exchange bits 15:8 with 7:0

do_csum_not_oddly_aligned:
	ret	[d2,d3],8

do_csum_end:
	.size	do_csum, do_csum_end-do_csum
Commit	Line	Data
b920de1b DH	1	/* Optimised simple memory checksum
	2	*
	3	* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
	4	* Written by David Howells (dhowells@redhat.com)
	5	*
	6	* This program is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU General Public Licence
	8	* as published by the Free Software Foundation; either version
	9	* 2 of the Licence, or (at your option) any later version.
	10	*/
	11	#include <asm/cache.h>
	12
	13	.section .text
	14	.balign L1_CACHE_BYTES
	15
	16	###############################################################################
	17	#
	18	# unsigned int do_csum(const unsigned char *buff, size_t len)
	19	#
	20	###############################################################################
	21	.globl do_csum
	22	.type do_csum,@function
	23	do_csum:
	24	movm [d2,d3],(sp)
	25	mov d0,(12,sp)
	26	mov d1,(16,sp)
	27	mov d1,d2 # count
	28	mov d0,a0 # buff
	29	clr d1 # accumulator
	30
	31	cmp +0,d2
	32	beq do_csum_done # return if zero-length buffer
	33
	34	# 4-byte align the buffer pointer
	35	btst +3,a0
	36	beq do_csum_now_4b_aligned
	37
	38	btst +1,a0
	39	beq do_csum_addr_not_odd
	40	movbu (a0),d0
	41	inc a0
	42	asl +8,d0
	43	add d0,d1
	44	addc +0,d1
	45	add -1,d2
	46	do_csum_addr_not_odd:
	47
	48	cmp +2,d2
	49	bcs do_csum_fewer_than_4
	50	btst +2,a0
	51	beq do_csum_now_4b_aligned
	52	movhu (a0+),d0
	53	add d0,d1
	54	addc +0,d1
	55	add -2,d2
	56	cmp +4,d2
	57	bcs do_csum_fewer_than_4
	58
	59	do_csum_now_4b_aligned:
	60	# we want to checksum as much as we can in chunks of 32 bytes
	61	cmp +31,d2
	62	bls do_csum_remainder # 4-byte aligned remainder
	63
	64	add -32,d2
65	mov +32,d3
66
67	do_csum_loop:
68	mov (a0+),d0
69	add d0,d1
70	mov (a0+),e0
71	addc e0,d1
72	mov (a0+),e1
73	addc e1,d1
74	mov (a0+),e3
75	addc e3,d1
76	mov (a0+),d0
77	addc d0,d1
78	mov (a0+),e0
79	addc e0,d1
80	mov (a0+),e1
81	addc e1,d1
82	mov (a0+),e3
83	addc e3,d1
84	addc +0,d1
85
86	sub d3,d2
87	bcc do_csum_loop
88
89	add d3,d2
90	beq do_csum_done
91
92	do_csum_remainder:
93	# cut 16-31 bytes down to 0-15
94	cmp +16,d2
95	bcs do_csum_fewer_than_16
96	mov (a0+),d0
97	add d0,d1
98	mov (a0+),e0
99	addc e0,d1
100	mov (a0+),e1
101	addc e1,d1
102	mov (a0+),e3
103	addc e3,d1
104	addc +0,d1
105	add -16,d2
106	beq do_csum_done
107
108	do_csum_fewer_than_16:
109	# copy the remaining whole words
110	cmp +4,d2
111	bcs do_csum_fewer_than_4
112	cmp +8,d2
113	bcs do_csum_one_word
114	cmp +12,d2
115	bcs do_csum_two_words
116	mov (a0+),d0
117	add d0,d1
118	addc +0,d1
119	do_csum_two_words:
120	mov (a0+),d0
121	add d0,d1
122	addc +0,d1
123	do_csum_one_word:
124	mov (a0+),d0
125	add d0,d1
126	addc +0,d1
127
128	do_csum_fewer_than_4:
129	and +3,d2
130	beq do_csum_done
131	xor_cmp d0,d0,+2,d2
132	bcs do_csum_fewer_than_2
133	movhu (a0+),d0
134	do_csum_fewer_than_2:
135	and +1,d2
136	beq do_csum_add_last_bit
137	movbu (a0),d3
138	add d3,d0
139	do_csum_add_last_bit:
140	add d0,d1
141	addc +0,d1
142
143	do_csum_done:
144	# compress the checksum down to 16 bits
145	mov +0xffff0000,d2
146	and d1,d2
147	asl +16,d1
148	add d2,d1,d0
149	addc +0xffff,d0
150	lsr +16,d0
151
152	# flip the halves of the word result if the buffer was oddly aligned
153	mov (12,sp),d1
154	and +1,d1
155	beq do_csum_not_oddly_aligned
156	swaph d0,d0 # exchange bits 15:8 with 7:0
157
158	do_csum_not_oddly_aligned:
159	ret [d2,d3],8
160
161	do_csum_end:
162	.size do_csum, do_csum_end-do_csum