Merge tag 'pci-v6.16-fixes-3' of git://git.kernel.org/pub/scm/linux/kernel/git/pci/pci
[linux-2.6-block.git] / arch / mips / lib / csum_partial.S
CommitLineData
0bcdda0f
AN
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Quick'n'dirty IP checksum ...
7 *
8 * Copyright (C) 1998, 1999 Ralf Baechle
9 * Copyright (C) 1999 Silicon Graphics, Inc.
619b6e18 10 * Copyright (C) 2007 Maciej W. Rozycki
ac85227f 11 * Copyright (C) 2014 Imagination Technologies Ltd.
0bcdda0f 12 */
f860c90b 13#include <linux/errno.h>
9259e15b 14#include <linux/export.h>
0bcdda0f 15#include <asm/asm.h>
f860c90b 16#include <asm/asm-offsets.h>
0bcdda0f
AN
17#include <asm/regdef.h>
18
19#ifdef CONFIG_64BIT
52ffe760
AN
20/*
21 * As we are sharing code base with the mips32 tree (which use the o32 ABI
22 * register definitions). We need to redefine the register definitions from
23 * the n64 ABI register naming to the o32 ABI register naming.
24 */
25#undef t0
26#undef t1
27#undef t2
28#undef t3
29#define t0 $8
30#define t1 $9
31#define t2 $10
32#define t3 $11
33#define t4 $12
34#define t5 $13
35#define t6 $14
36#define t7 $15
ed99e2bc
AN
37
38#define USE_DOUBLE
0bcdda0f
AN
39#endif
40
ed99e2bc
AN
41#ifdef USE_DOUBLE
42
43#define LOAD ld
b80a1b80 44#define LOAD32 lwu
ed99e2bc
AN
45#define ADD daddu
46#define NBYTES 8
47
48#else
49
50#define LOAD lw
b80a1b80 51#define LOAD32 lw
ed99e2bc
AN
52#define ADD addu
53#define NBYTES 4
54
55#endif /* USE_DOUBLE */
56
57#define UNIT(unit) ((unit)*NBYTES)
58
0bcdda0f 59#define ADDC(sum,reg) \
44ba138f
MR
60 .set push; \
61 .set noat; \
ed99e2bc 62 ADD sum, reg; \
0bcdda0f 63 sltu v1, sum, reg; \
619b6e18 64 ADD sum, v1; \
44ba138f 65 .set pop
0bcdda0f 66
b80a1b80 67#define ADDC32(sum,reg) \
44ba138f
MR
68 .set push; \
69 .set noat; \
b80a1b80
AN
70 addu sum, reg; \
71 sltu v1, sum, reg; \
72 addu sum, v1; \
44ba138f 73 .set pop
b80a1b80 74
ed99e2bc
AN
75#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
76 LOAD _t0, (offset + UNIT(0))(src); \
77 LOAD _t1, (offset + UNIT(1))(src); \
70342287
RB
78 LOAD _t2, (offset + UNIT(2))(src); \
79 LOAD _t3, (offset + UNIT(3))(src); \
615eb603
CJ
80 ADDC(_t0, _t1); \
81 ADDC(_t2, _t3); \
0bcdda0f 82 ADDC(sum, _t0); \
615eb603 83 ADDC(sum, _t2)
ed99e2bc
AN
84
85#ifdef USE_DOUBLE
86#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
87 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)
88#else
89#define CSUM_BIGCHUNK(src, offset, sum, _t0, _t1, _t2, _t3) \
90 CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3); \
91 CSUM_BIGCHUNK1(src, offset + 0x10, sum, _t0, _t1, _t2, _t3)
92#endif
0bcdda0f
AN
93
94/*
95 * a0: source address
96 * a1: length of the area to checksum
97 * a2: partial checksum
98 */
99
100#define src a0
101#define sum v0
102
103 .text
104 .set noreorder
0bcdda0f
AN
105 .align 5
106LEAF(csum_partial)
23130042 107EXPORT_SYMBOL(csum_partial)
0bcdda0f 108 move sum, zero
52ffe760 109 move t7, zero
0bcdda0f
AN
110
111 sltiu t8, a1, 0x8
c5ec1983 112 bnez t8, .Lsmall_csumcpy /* < 8 bytes to copy */
52ffe760 113 move t2, a1
0bcdda0f 114
773ff788 115 andi t7, src, 0x1 /* odd buffer? */
0bcdda0f 116
c5ec1983
RB
117.Lhword_align:
118 beqz t7, .Lword_align
0bcdda0f
AN
119 andi t8, src, 0x2
120
52ffe760 121 lbu t0, (src)
0bcdda0f
AN
122 LONG_SUBU a1, a1, 0x1
123#ifdef __MIPSEL__
52ffe760 124 sll t0, t0, 8
0bcdda0f 125#endif
52ffe760 126 ADDC(sum, t0)
0bcdda0f
AN
127 PTR_ADDU src, src, 0x1
128 andi t8, src, 0x2
129
c5ec1983
RB
130.Lword_align:
131 beqz t8, .Ldword_align
0bcdda0f
AN
132 sltiu t8, a1, 56
133
52ffe760 134 lhu t0, (src)
0bcdda0f 135 LONG_SUBU a1, a1, 0x2
52ffe760 136 ADDC(sum, t0)
0bcdda0f
AN
137 sltiu t8, a1, 56
138 PTR_ADDU src, src, 0x2
139
c5ec1983
RB
140.Ldword_align:
141 bnez t8, .Ldo_end_words
0bcdda0f
AN
142 move t8, a1
143
144 andi t8, src, 0x4
c5ec1983 145 beqz t8, .Lqword_align
0bcdda0f
AN
146 andi t8, src, 0x8
147
b80a1b80 148 LOAD32 t0, 0x00(src)
0bcdda0f 149 LONG_SUBU a1, a1, 0x4
52ffe760 150 ADDC(sum, t0)
0bcdda0f
AN
151 PTR_ADDU src, src, 0x4
152 andi t8, src, 0x8
153
c5ec1983
RB
154.Lqword_align:
155 beqz t8, .Loword_align
0bcdda0f
AN
156 andi t8, src, 0x10
157
ed99e2bc
AN
158#ifdef USE_DOUBLE
159 ld t0, 0x00(src)
160 LONG_SUBU a1, a1, 0x8
161 ADDC(sum, t0)
162#else
52ffe760
AN
163 lw t0, 0x00(src)
164 lw t1, 0x04(src)
0bcdda0f 165 LONG_SUBU a1, a1, 0x8
52ffe760
AN
166 ADDC(sum, t0)
167 ADDC(sum, t1)
ed99e2bc 168#endif
0bcdda0f
AN
169 PTR_ADDU src, src, 0x8
170 andi t8, src, 0x10
171
c5ec1983
RB
172.Loword_align:
173 beqz t8, .Lbegin_movement
0bcdda0f
AN
174 LONG_SRL t8, a1, 0x7
175
ed99e2bc
AN
176#ifdef USE_DOUBLE
177 ld t0, 0x00(src)
178 ld t1, 0x08(src)
52ffe760
AN
179 ADDC(sum, t0)
180 ADDC(sum, t1)
ed99e2bc
AN
181#else
182 CSUM_BIGCHUNK1(src, 0x00, sum, t0, t1, t3, t4)
183#endif
0bcdda0f
AN
184 LONG_SUBU a1, a1, 0x10
185 PTR_ADDU src, src, 0x10
186 LONG_SRL t8, a1, 0x7
187
c5ec1983 188.Lbegin_movement:
0bcdda0f 189 beqz t8, 1f
52ffe760 190 andi t2, a1, 0x40
0bcdda0f 191
c5ec1983 192.Lmove_128bytes:
52ffe760
AN
193 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
194 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
195 CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
196 CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
0bcdda0f 197 LONG_SUBU t8, t8, 0x01
619b6e18
MR
198 .set reorder /* DADDI_WAR */
199 PTR_ADDU src, src, 0x80
c5ec1983 200 bnez t8, .Lmove_128bytes
619b6e18 201 .set noreorder
0bcdda0f
AN
202
2031:
52ffe760
AN
204 beqz t2, 1f
205 andi t2, a1, 0x20
0bcdda0f 206
c5ec1983 207.Lmove_64bytes:
52ffe760
AN
208 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
209 CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
0bcdda0f
AN
210 PTR_ADDU src, src, 0x40
211
2121:
c5ec1983 213 beqz t2, .Ldo_end_words
0bcdda0f
AN
214 andi t8, a1, 0x1c
215
c5ec1983 216.Lmove_32bytes:
52ffe760 217 CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
0bcdda0f
AN
218 andi t8, a1, 0x1c
219 PTR_ADDU src, src, 0x20
220
c5ec1983
RB
221.Ldo_end_words:
222 beqz t8, .Lsmall_csumcpy
773ff788
AN
223 andi t2, a1, 0x3
224 LONG_SRL t8, t8, 0x2
0bcdda0f 225
c5ec1983 226.Lend_words:
b80a1b80 227 LOAD32 t0, (src)
0bcdda0f 228 LONG_SUBU t8, t8, 0x1
52ffe760 229 ADDC(sum, t0)
619b6e18
MR
230 .set reorder /* DADDI_WAR */
231 PTR_ADDU src, src, 0x4
c5ec1983 232 bnez t8, .Lend_words
619b6e18 233 .set noreorder
0bcdda0f 234
773ff788 235/* unknown src alignment and < 8 bytes to go */
c5ec1983 236.Lsmall_csumcpy:
773ff788 237 move a1, t2
0bcdda0f 238
773ff788
AN
239 andi t0, a1, 4
240 beqz t0, 1f
241 andi t0, a1, 2
0bcdda0f 242
773ff788
AN
243 /* Still a full word to go */
244 ulw t1, (src)
245 PTR_ADDIU src, 4
b80a1b80
AN
246#ifdef USE_DOUBLE
247 dsll t1, t1, 32 /* clear lower 32bit */
248#endif
773ff788
AN
249 ADDC(sum, t1)
250
2511: move t1, zero
252 beqz t0, 1f
253 andi t0, a1, 1
254
255 /* Still a halfword to go */
256 ulhu t1, (src)
257 PTR_ADDIU src, 2
258
2591: beqz t0, 1f
260 sll t1, t1, 16
261
262 lbu t2, (src)
263 nop
264
265#ifdef __MIPSEB__
266 sll t2, t2, 8
267#endif
268 or t1, t2
269
2701: ADDC(sum, t1)
0bcdda0f 271
773ff788 272 /* fold checksum */
ed99e2bc
AN
273#ifdef USE_DOUBLE
274 dsll32 v1, sum, 0
275 daddu sum, v1
276 sltu v1, sum, v1
277 dsra32 sum, sum, 0
278 addu sum, v1
279#endif
773ff788
AN
280
281 /* odd buffer alignment? */
ab7c01fd
SS
282#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
283 defined(CONFIG_CPU_LOONGSON64)
3c09bae4
CJ
284 .set push
285 .set arch=mips32r2
b65a75b8
RB
286 wsbh v1, sum
287 movn sum, v1, t7
3c09bae4 288 .set pop
b65a75b8
RB
289#else
290 beqz t7, 1f /* odd buffer alignment? */
291 lui v1, 0x00ff
292 addu v1, 0x00ff
293 and t0, sum, v1
294 sll t0, t0, 8
773ff788 295 srl sum, sum, 8
b65a75b8
RB
296 and sum, sum, v1
297 or sum, sum, t0
773ff788 2981:
b65a75b8 299#endif
773ff788 300 .set reorder
70342287 301 /* Add the passed partial csum. */
b80a1b80 302 ADDC32(sum, a2)
0bcdda0f 303 jr ra
773ff788 304 .set noreorder
0bcdda0f 305 END(csum_partial)
f860c90b
AN
306
307
308/*
309 * checksum and copy routines based on memcpy.S
310 *
1cd95ab8
AV
311 * csum_partial_copy_nocheck(src, dst, len)
312 * __csum_partial_copy_kernel(src, dst, len)
f860c90b 313 *
70342287 314 * See "Spec" in memcpy.S for details. Unlike __copy_user, all
f860c90b
AN
315 * function in this file use the standard calling convention.
316 */
317
318#define src a0
319#define dst a1
320#define len a2
f860c90b
AN
321#define sum v0
322#define odd t8
f860c90b
AN
323
324/*
1cd95ab8 325 * All exception handlers simply return 0.
f860c90b
AN
326 */
327
2ab82e66
MC
328/* Instruction type */
329#define LD_INSN 1
330#define ST_INSN 2
e89fb56c
MC
331#define LEGACY_MODE 1
332#define EVA_MODE 2
333#define USEROP 1
334#define KERNELOP 2
2ab82e66
MC
335
336/*
337 * Wrapper to add an entry in the exception table
338 * in case the insn causes a memory exception.
339 * Arguments:
340 * insn : Load/store instruction
341 * type : Instruction type
342 * reg : Register
343 * addr : Address
344 * handler : Exception handler
345 */
1cd95ab8 346#define EXC(insn, type, reg, addr) \
e89fb56c
MC
347 .if \mode == LEGACY_MODE; \
3489: insn reg, addr; \
349 .section __ex_table,"a"; \
fa62f39d 350 PTR_WD 9b, .L_exc; \
e89fb56c 351 .previous; \
6f85cebe
MC
352 /* This is enabled in EVA mode */ \
353 .else; \
354 /* If loading from user or storing to user */ \
355 .if ((\from == USEROP) && (type == LD_INSN)) || \
356 ((\to == USEROP) && (type == ST_INSN)); \
3579: __BUILD_EVA_INSN(insn##e, reg, addr); \
358 .section __ex_table,"a"; \
fa62f39d 359 PTR_WD 9b, .L_exc; \
6f85cebe
MC
360 .previous; \
361 .else; \
362 /* EVA without exception */ \
363 insn reg, addr; \
364 .endif; \
e89fb56c 365 .endif
f860c90b 366
2ab82e66
MC
367#undef LOAD
368
f860c90b
AN
369#ifdef USE_DOUBLE
370
2ab82e66 371#define LOADK ld /* No exception */
1cd95ab8
AV
372#define LOAD(reg, addr) EXC(ld, LD_INSN, reg, addr)
373#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
374#define LOADL(reg, addr) EXC(ldl, LD_INSN, reg, addr)
375#define LOADR(reg, addr) EXC(ldr, LD_INSN, reg, addr)
376#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
377#define STOREL(reg, addr) EXC(sdl, ST_INSN, reg, addr)
378#define STORER(reg, addr) EXC(sdr, ST_INSN, reg, addr)
379#define STORE(reg, addr) EXC(sd, ST_INSN, reg, addr)
f860c90b
AN
380#define ADD daddu
381#define SUB dsubu
382#define SRL dsrl
383#define SLL dsll
384#define SLLV dsllv
385#define SRLV dsrlv
386#define NBYTES 8
387#define LOG_NBYTES 3
388
389#else
390
2ab82e66 391#define LOADK lw /* No exception */
1cd95ab8
AV
392#define LOAD(reg, addr) EXC(lw, LD_INSN, reg, addr)
393#define LOADBU(reg, addr) EXC(lbu, LD_INSN, reg, addr)
394#define LOADL(reg, addr) EXC(lwl, LD_INSN, reg, addr)
395#define LOADR(reg, addr) EXC(lwr, LD_INSN, reg, addr)
396#define STOREB(reg, addr) EXC(sb, ST_INSN, reg, addr)
397#define STOREL(reg, addr) EXC(swl, ST_INSN, reg, addr)
398#define STORER(reg, addr) EXC(swr, ST_INSN, reg, addr)
399#define STORE(reg, addr) EXC(sw, ST_INSN, reg, addr)
f860c90b
AN
400#define ADD addu
401#define SUB subu
402#define SRL srl
403#define SLL sll
404#define SLLV sllv
405#define SRLV srlv
406#define NBYTES 4
407#define LOG_NBYTES 2
408
409#endif /* USE_DOUBLE */
410
411#ifdef CONFIG_CPU_LITTLE_ENDIAN
412#define LDFIRST LOADR
70342287 413#define LDREST LOADL
f860c90b 414#define STFIRST STORER
70342287 415#define STREST STOREL
f860c90b
AN
416#define SHIFT_DISCARD SLLV
417#define SHIFT_DISCARD_REVERT SRLV
418#else
419#define LDFIRST LOADL
70342287 420#define LDREST LOADR
f860c90b 421#define STFIRST STOREL
70342287 422#define STREST STORER
f860c90b
AN
423#define SHIFT_DISCARD SRLV
424#define SHIFT_DISCARD_REVERT SLLV
425#endif
426
427#define FIRST(unit) ((unit)*NBYTES)
428#define REST(unit) (FIRST(unit)+NBYTES-1)
429
430#define ADDRMASK (NBYTES-1)
431
619b6e18 432#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
f860c90b 433 .set noat
619b6e18
MR
434#else
435 .set at=v1
436#endif
f860c90b 437
1cd95ab8 438 .macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
e89fb56c 439
1cd95ab8 440 li sum, -1
f860c90b
AN
441 move odd, zero
442 /*
443 * Note: dst & src may be unaligned, len may be 0
444 * Temps
445 */
446 /*
447 * The "issue break"s below are very approximate.
448 * Issue delays for dcache fills will perturb the schedule, as will
449 * load queue full replay traps, etc.
450 *
451 * If len < NBYTES use byte operations.
452 */
453 sltu t2, len, NBYTES
454 and t1, dst, ADDRMASK
e89fb56c 455 bnez t2, .Lcopy_bytes_checklen\@
f860c90b
AN
456 and t0, src, ADDRMASK
457 andi odd, dst, 0x1 /* odd buffer? */
e89fb56c 458 bnez t1, .Ldst_unaligned\@
f860c90b 459 nop
e89fb56c 460 bnez t0, .Lsrc_unaligned_dst_aligned\@
f860c90b
AN
461 /*
462 * use delay slot for fall-through
463 * src and dst are aligned; need to compute rem
464 */
e89fb56c 465.Lboth_aligned\@:
70342287 466 SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
e89fb56c 467 beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
f860c90b
AN
468 nop
469 SUB len, 8*NBYTES # subtract here for bgez loop
470 .align 4
4711:
1cd95ab8
AV
472 LOAD(t0, UNIT(0)(src))
473 LOAD(t1, UNIT(1)(src))
474 LOAD(t2, UNIT(2)(src))
475 LOAD(t3, UNIT(3)(src))
476 LOAD(t4, UNIT(4)(src))
477 LOAD(t5, UNIT(5)(src))
478 LOAD(t6, UNIT(6)(src))
479 LOAD(t7, UNIT(7)(src))
f860c90b
AN
480 SUB len, len, 8*NBYTES
481 ADD src, src, 8*NBYTES
1cd95ab8 482 STORE(t0, UNIT(0)(dst))
615eb603 483 ADDC(t0, t1)
1cd95ab8 484 STORE(t1, UNIT(1)(dst))
615eb603 485 ADDC(sum, t0)
1cd95ab8 486 STORE(t2, UNIT(2)(dst))
615eb603 487 ADDC(t2, t3)
1cd95ab8 488 STORE(t3, UNIT(3)(dst))
615eb603 489 ADDC(sum, t2)
1cd95ab8 490 STORE(t4, UNIT(4)(dst))
615eb603 491 ADDC(t4, t5)
1cd95ab8 492 STORE(t5, UNIT(5)(dst))
615eb603 493 ADDC(sum, t4)
1cd95ab8 494 STORE(t6, UNIT(6)(dst))
615eb603 495 ADDC(t6, t7)
1cd95ab8 496 STORE(t7, UNIT(7)(dst))
615eb603 497 ADDC(sum, t6)
619b6e18
MR
498 .set reorder /* DADDI_WAR */
499 ADD dst, dst, 8*NBYTES
f860c90b 500 bgez len, 1b
619b6e18 501 .set noreorder
f860c90b
AN
502 ADD len, 8*NBYTES # revert len (see above)
503
504 /*
505 * len == the number of bytes left to copy < 8*NBYTES
506 */
e89fb56c 507.Lcleanup_both_aligned\@:
f860c90b 508#define rem t7
e89fb56c 509 beqz len, .Ldone\@
f860c90b 510 sltu t0, len, 4*NBYTES
e89fb56c 511 bnez t0, .Lless_than_4units\@
f860c90b
AN
512 and rem, len, (NBYTES-1) # rem = len % NBYTES
513 /*
514 * len >= 4*NBYTES
515 */
1cd95ab8
AV
516 LOAD(t0, UNIT(0)(src))
517 LOAD(t1, UNIT(1)(src))
518 LOAD(t2, UNIT(2)(src))
519 LOAD(t3, UNIT(3)(src))
f860c90b
AN
520 SUB len, len, 4*NBYTES
521 ADD src, src, 4*NBYTES
1cd95ab8 522 STORE(t0, UNIT(0)(dst))
615eb603 523 ADDC(t0, t1)
1cd95ab8 524 STORE(t1, UNIT(1)(dst))
615eb603 525 ADDC(sum, t0)
1cd95ab8 526 STORE(t2, UNIT(2)(dst))
615eb603 527 ADDC(t2, t3)
1cd95ab8 528 STORE(t3, UNIT(3)(dst))
615eb603 529 ADDC(sum, t2)
619b6e18
MR
530 .set reorder /* DADDI_WAR */
531 ADD dst, dst, 4*NBYTES
e89fb56c 532 beqz len, .Ldone\@
619b6e18 533 .set noreorder
e89fb56c 534.Lless_than_4units\@:
f860c90b
AN
535 /*
536 * rem = len % NBYTES
537 */
e89fb56c 538 beq rem, len, .Lcopy_bytes\@
f860c90b
AN
539 nop
5401:
1cd95ab8 541 LOAD(t0, 0(src))
f860c90b
AN
542 ADD src, src, NBYTES
543 SUB len, len, NBYTES
1cd95ab8 544 STORE(t0, 0(dst))
f860c90b 545 ADDC(sum, t0)
619b6e18
MR
546 .set reorder /* DADDI_WAR */
547 ADD dst, dst, NBYTES
f860c90b 548 bne rem, len, 1b
619b6e18 549 .set noreorder
f860c90b
AN
550
551 /*
552 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
553 * A loop would do only a byte at a time with possible branch
70342287 554 * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE
f860c90b
AN
555 * because can't assume read-access to dst. Instead, use
556 * STREST dst, which doesn't require read access to dst.
557 *
558 * This code should perform better than a simple loop on modern,
559 * wide-issue mips processors because the code has fewer branches and
560 * more instruction-level parallelism.
561 */
562#define bits t2
e89fb56c 563 beqz len, .Ldone\@
f860c90b
AN
564 ADD t1, dst, len # t1 is just past last byte of dst
565 li bits, 8*NBYTES
566 SLL rem, len, 3 # rem = number of bits to keep
1cd95ab8 567 LOAD(t0, 0(src))
70342287 568 SUB bits, bits, rem # bits = number of bits to discard
f860c90b 569 SHIFT_DISCARD t0, t0, bits
1cd95ab8 570 STREST(t0, -1(t1))
f860c90b
AN
571 SHIFT_DISCARD_REVERT t0, t0, bits
572 .set reorder
573 ADDC(sum, t0)
e89fb56c 574 b .Ldone\@
f860c90b 575 .set noreorder
e89fb56c 576.Ldst_unaligned\@:
f860c90b
AN
577 /*
578 * dst is unaligned
579 * t0 = src & ADDRMASK
580 * t1 = dst & ADDRMASK; T1 > 0
581 * len >= NBYTES
582 *
583 * Copy enough bytes to align dst
584 * Set match = (src and dst have same alignment)
585 */
586#define match rem
1cd95ab8 587 LDFIRST(t3, FIRST(0)(src))
f860c90b 588 ADD t2, zero, NBYTES
1cd95ab8 589 LDREST(t3, REST(0)(src))
f860c90b
AN
590 SUB t2, t2, t1 # t2 = number of bytes copied
591 xor match, t0, t1
1cd95ab8 592 STFIRST(t3, FIRST(0)(dst))
f860c90b
AN
593 SLL t4, t1, 3 # t4 = number of bits to discard
594 SHIFT_DISCARD t3, t3, t4
595 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
596 ADDC(sum, t3)
e89fb56c 597 beq len, t2, .Ldone\@
f860c90b
AN
598 SUB len, len, t2
599 ADD dst, dst, t2
e89fb56c 600 beqz match, .Lboth_aligned\@
f860c90b
AN
601 ADD src, src, t2
602
e89fb56c 603.Lsrc_unaligned_dst_aligned\@:
70342287 604 SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
e89fb56c 605 beqz t0, .Lcleanup_src_unaligned\@
70342287 606 and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
f860c90b
AN
6071:
608/*
609 * Avoid consecutive LD*'s to the same register since some mips
610 * implementations can't issue them in the same cycle.
611 * It's OK to load FIRST(N+1) before REST(N) because the two addresses
612 * are to the same unit (unless src is aligned, but it's not).
613 */
1cd95ab8
AV
614 LDFIRST(t0, FIRST(0)(src))
615 LDFIRST(t1, FIRST(1)(src))
70342287 616 SUB len, len, 4*NBYTES
1cd95ab8
AV
617 LDREST(t0, REST(0)(src))
618 LDREST(t1, REST(1)(src))
619 LDFIRST(t2, FIRST(2)(src))
620 LDFIRST(t3, FIRST(3)(src))
621 LDREST(t2, REST(2)(src))
622 LDREST(t3, REST(3)(src))
f860c90b
AN
623 ADD src, src, 4*NBYTES
624#ifdef CONFIG_CPU_SB1
625 nop # improves slotting
626#endif
1cd95ab8 627 STORE(t0, UNIT(0)(dst))
615eb603 628 ADDC(t0, t1)
1cd95ab8 629 STORE(t1, UNIT(1)(dst))
615eb603 630 ADDC(sum, t0)
1cd95ab8 631 STORE(t2, UNIT(2)(dst))
615eb603 632 ADDC(t2, t3)
1cd95ab8 633 STORE(t3, UNIT(3)(dst))
615eb603 634 ADDC(sum, t2)
619b6e18
MR
635 .set reorder /* DADDI_WAR */
636 ADD dst, dst, 4*NBYTES
f860c90b 637 bne len, rem, 1b
619b6e18 638 .set noreorder
f860c90b 639
e89fb56c
MC
640.Lcleanup_src_unaligned\@:
641 beqz len, .Ldone\@
f860c90b 642 and rem, len, NBYTES-1 # rem = len % NBYTES
e89fb56c 643 beq rem, len, .Lcopy_bytes\@
f860c90b
AN
644 nop
6451:
1cd95ab8
AV
646 LDFIRST(t0, FIRST(0)(src))
647 LDREST(t0, REST(0)(src))
f860c90b
AN
648 ADD src, src, NBYTES
649 SUB len, len, NBYTES
1cd95ab8 650 STORE(t0, 0(dst))
f860c90b 651 ADDC(sum, t0)
619b6e18
MR
652 .set reorder /* DADDI_WAR */
653 ADD dst, dst, NBYTES
f860c90b 654 bne len, rem, 1b
619b6e18 655 .set noreorder
f860c90b 656
e89fb56c
MC
657.Lcopy_bytes_checklen\@:
658 beqz len, .Ldone\@
f860c90b 659 nop
e89fb56c 660.Lcopy_bytes\@:
f860c90b
AN
661 /* 0 < len < NBYTES */
662#ifdef CONFIG_CPU_LITTLE_ENDIAN
663#define SHIFT_START 0
664#define SHIFT_INC 8
665#else
666#define SHIFT_START 8*(NBYTES-1)
667#define SHIFT_INC -8
668#endif
669 move t2, zero # partial word
70342287 670 li t3, SHIFT_START # shift
f860c90b 671#define COPY_BYTE(N) \
1cd95ab8 672 LOADBU(t0, N(src)); \
f860c90b 673 SUB len, len, 1; \
1cd95ab8 674 STOREB(t0, N(dst)); \
f860c90b
AN
675 SLLV t0, t0, t3; \
676 addu t3, SHIFT_INC; \
e89fb56c 677 beqz len, .Lcopy_bytes_done\@; \
f860c90b
AN
678 or t2, t0
679
680 COPY_BYTE(0)
681 COPY_BYTE(1)
682#ifdef USE_DOUBLE
683 COPY_BYTE(2)
684 COPY_BYTE(3)
685 COPY_BYTE(4)
686 COPY_BYTE(5)
687#endif
1cd95ab8 688 LOADBU(t0, NBYTES-2(src))
f860c90b 689 SUB len, len, 1
1cd95ab8 690 STOREB(t0, NBYTES-2(dst))
f860c90b
AN
691 SLLV t0, t0, t3
692 or t2, t0
e89fb56c 693.Lcopy_bytes_done\@:
f860c90b 694 ADDC(sum, t2)
e89fb56c 695.Ldone\@:
f860c90b 696 /* fold checksum */
44ba138f
MR
697 .set push
698 .set noat
f860c90b
AN
699#ifdef USE_DOUBLE
700 dsll32 v1, sum, 0
701 daddu sum, v1
702 sltu v1, sum, v1
703 dsra32 sum, sum, 0
704 addu sum, v1
705#endif
f860c90b 706
ab7c01fd
SS
707#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
708 defined(CONFIG_CPU_LOONGSON64)
3c09bae4
CJ
709 .set push
710 .set arch=mips32r2
b65a75b8
RB
711 wsbh v1, sum
712 movn sum, v1, odd
3c09bae4 713 .set pop
b65a75b8
RB
714#else
715 beqz odd, 1f /* odd buffer alignment? */
716 lui v1, 0x00ff
717 addu v1, 0x00ff
718 and t0, sum, v1
719 sll t0, t0, 8
f860c90b 720 srl sum, sum, 8
b65a75b8
RB
721 and sum, sum, v1
722 or sum, sum, t0
f860c90b 7231:
b65a75b8 724#endif
44ba138f 725 .set pop
f860c90b 726 .set reorder
f860c90b
AN
727 jr ra
728 .set noreorder
1cd95ab8 729 .endm
f860c90b 730
1cd95ab8
AV
731 .set noreorder
732.L_exc:
f860c90b 733 jr ra
1cd95ab8 734 li v0, 0
e89fb56c 735
1cd95ab8
AV
736FEXPORT(__csum_partial_copy_nocheck)
737EXPORT_SYMBOL(__csum_partial_copy_nocheck)
6f85cebe 738#ifndef CONFIG_EVA
e89fb56c 739FEXPORT(__csum_partial_copy_to_user)
23130042 740EXPORT_SYMBOL(__csum_partial_copy_to_user)
e89fb56c 741FEXPORT(__csum_partial_copy_from_user)
23130042 742EXPORT_SYMBOL(__csum_partial_copy_from_user)
6f85cebe 743#endif
1cd95ab8 744__BUILD_CSUM_PARTIAL_COPY_USER LEGACY_MODE USEROP USEROP
6f85cebe
MC
745
746#ifdef CONFIG_EVA
747LEAF(__csum_partial_copy_to_user)
1cd95ab8 748__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE KERNELOP USEROP
6f85cebe
MC
749END(__csum_partial_copy_to_user)
750
751LEAF(__csum_partial_copy_from_user)
1cd95ab8 752__BUILD_CSUM_PARTIAL_COPY_USER EVA_MODE USEROP KERNELOP
6f85cebe
MC
753END(__csum_partial_copy_from_user)
754#endif