2 * linux/arch/arm/lib/csumpartialcopygeneric.S
4 * Copyright (C) 1995-2001 Russell King
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
10 #include <asm/assembler.h>
14 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
15 * r0 = src, r1 = dst, r2 = len, r3 = sum
16 * Returns : r0 = checksum
18 * Note that 'tst' and 'teq' preserve the carry flag.
30 * Align an unaligned destination pointer. We know that
31 * we have >= 8 bytes here, so we don't need to check
32 * the length. Note that the source pointer hasn't been
41 adcs sum, sum, ip, put_byte_1 @ update checksum
44 reteq lr @ dst is now 32bit aligned
46 .Ldst_16bit: load2b r8, ip
48 adcs sum, sum, r8, put_byte_0
50 adcs sum, sum, ip, put_byte_1
52 ret lr @ dst is now 32bit aligned
55 * Handle 0 to 7 bytes, with any alignment of source and
56 * destination pointers. Note that when we get here, C = 0
58 .Lless8: teq len, #0 @ check for zero count
61 /* we must have at least one byte. */
62 tst dst, #1 @ dst 16-bit aligned
68 adcs sum, sum, ip, put_byte_1 @ update checksum
75 adcs sum, sum, r8, put_byte_0
77 adcs sum, sum, ip, put_byte_1
86 adcs sum, sum, r8, put_byte_0 @ update checksum
93 cmp len, #8 @ Ensure that we have at least
94 blo .Lless8 @ 8 bytes to copy.
96 adds sum, sum, #0 @ C = 0
97 tst dst, #3 @ Test destination alignment
98 blne .Ldst_unaligned @ align destination, return here
101 * Ok, the dst pointer is now 32bit aligned, and we know
102 * that we must have more than 4 bytes to copy. Note
103 * that C contains the carry from the dst alignment above.
106 tst src, #3 @ Test source alignment
107 bne .Lsrc_not_aligned
109 /* Routine for src & dst aligned */
114 1: load4l r4, r5, r6, r7
115 stmia dst!, {r4, r5, r6, r7}
143 mov r5, r4, get_byte_0
145 adcs sum, sum, r4, lspush #16
147 mov r5, r4, get_byte_1
149 mov r5, r4, get_byte_2
153 adcnes sum, sum, r5, put_byte_0
156 * If the dst pointer was not 16-bit aligned, we
157 * need to rotate the checksum here to get around
158 * the inefficient byte manipulations in the
159 * architecture independent code.
161 .Ldone: adc r0, sum, #0
162 ldr sum, [sp, #0] @ dst
168 adc sum, sum, #0 @ include C from dst alignment
175 mov r4, r5, lspull #8 @ C = 0
178 1: load4l r5, r6, r7, r8
179 orr r4, r4, r5, lspush #24
180 mov r5, r5, lspull #8
181 orr r5, r5, r6, lspush #24
182 mov r6, r6, lspull #8
183 orr r6, r6, r7, lspush #24
184 mov r7, r7, lspull #8
185 orr r7, r7, r8, lspush #24
186 stmia dst!, {r4, r5, r6, r7}
191 mov r4, r8, lspull #8
200 orr r4, r4, r5, lspush #24
201 mov r5, r5, lspull #8
202 orr r5, r5, r6, lspush #24
206 mov r4, r6, lspull #8
210 orr r4, r4, r5, lspush #24
213 mov r4, r5, lspull #8
216 mov r5, r4, get_byte_0
219 adcs sum, sum, r4, lspush #16
221 mov r5, r4, get_byte_1
223 mov r5, r4, get_byte_2
226 .Lsrc2_aligned: mov r4, r5, lspull #16
230 1: load4l r5, r6, r7, r8
231 orr r4, r4, r5, lspush #16
232 mov r5, r5, lspull #16
233 orr r5, r5, r6, lspush #16
234 mov r6, r6, lspull #16
235 orr r6, r6, r7, lspush #16
236 mov r7, r7, lspull #16
237 orr r7, r7, r8, lspush #16
238 stmia dst!, {r4, r5, r6, r7}
243 mov r4, r8, lspull #16
252 orr r4, r4, r5, lspush #16
253 mov r5, r5, lspull #16
254 orr r5, r5, r6, lspush #16
258 mov r4, r6, lspull #16
262 orr r4, r4, r5, lspush #16
265 mov r4, r5, lspull #16
268 mov r5, r4, get_byte_0
273 mov r5, r4, get_byte_1
280 .Lsrc3_aligned: mov r4, r5, lspull #24
284 1: load4l r5, r6, r7, r8
285 orr r4, r4, r5, lspush #8
286 mov r5, r5, lspull #24
287 orr r5, r5, r6, lspush #8
288 mov r6, r6, lspull #24
289 orr r6, r6, r7, lspush #8
290 mov r7, r7, lspull #24
291 orr r7, r7, r8, lspush #8
292 stmia dst!, {r4, r5, r6, r7}
297 mov r4, r8, lspull #24
306 orr r4, r4, r5, lspush #8
307 mov r5, r5, lspull #24
308 orr r5, r5, r6, lspush #8
312 mov r4, r6, lspull #24
316 orr r4, r4, r5, lspush #8
319 mov r4, r5, lspull #24
322 mov r5, r4, get_byte_0
328 mov r5, r4, get_byte_0
330 adcs sum, sum, r4, lspush #24
331 mov r5, r4, get_byte_1