Merge git://git.infradead.org/users/willy/linux-nvme
[linux-2.6-block.git] / arch / c6x / lib / csum_64plus.S
CommitLineData
09831ca7
AJ
1;
2; linux/arch/c6x/lib/csum_64plus.s
3;
4; Port on Texas Instruments TMS320C6x architecture
5;
6; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated
7; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com)
8;
9; This program is free software; you can redistribute it and/or modify
10; it under the terms of the GNU General Public License version 2 as
11; published by the Free Software Foundation.
12;
13#include <linux/linkage.h>
14
15;
16;unsigned int csum_partial_copy(const char *src, char * dst,
17; int len, int sum)
18;
19; A4: src
20; B4: dst
21; A6: len
22; B6: sum
23; return csum in A4
24;
25
26 .text
27ENTRY(csum_partial_copy)
28 MVC .S2 ILC,B30
29
30 MV .D1X B6,A31 ; given csum
31 ZERO .D1 A9 ; csum (a side)
32|| ZERO .D2 B9 ; csum (b side)
33|| SHRU .S2X A6,2,B5 ; len / 4
34
35 ;; Check alignment and size
36 AND .S1 3,A4,A1
37|| AND .S2 3,B4,B0
38 OR .L2X B0,A1,B0 ; non aligned condition
39|| MVC .S2 B5,ILC
40|| MVK .D2 1,B2
41|| MV .D1X B5,A1 ; words condition
42 [!A1] B .S1 L8
43 [B0] BNOP .S1 L6,5
44
45 SPLOOP 1
46
47 ;; Main loop for aligned words
48 LDW .D1T1 *A4++,A7
49 NOP 4
50 MV .S2X A7,B7
51|| EXTU .S1 A7,0,16,A16
52 STW .D2T2 B7,*B4++
53|| MPYU .M2 B7,B2,B8
54|| ADD .L1 A16,A9,A9
55 NOP
56 SPKERNEL 8,0
57|| ADD .L2 B8,B9,B9
58
59 ZERO .D1 A1
60|| ADD .L1X A9,B9,A9 ; add csum from a and b sides
61
62L6:
63 [!A1] BNOP .S1 L8,5
64
65 ;; Main loop for non-aligned words
66 SPLOOP 2
67 || MVK .L1 1,A2
68
69 LDNW .D1T1 *A4++,A7
70 NOP 3
71
72 NOP
73 MV .S2X A7,B7
74 || EXTU .S1 A7,0,16,A16
75 || MPYU .M1 A7,A2,A8
76
77 ADD .L1 A16,A9,A9
78 SPKERNEL 6,0
79 || STNW .D2T2 B7,*B4++
80 || ADD .L1 A8,A9,A9
81
82L8: AND .S2X 2,A6,B5
83 CMPGT .L2 B5,0,B0
84 [!B0] BNOP .S1 L82,4
85
86 ;; Manage half-word
87 ZERO .L1 A7
88|| ZERO .D1 A8
89
90#ifdef CONFIG_CPU_BIG_ENDIAN
91
92 LDBU .D1T1 *A4++,A7
93 LDBU .D1T1 *A4++,A8
94 NOP 3
95 SHL .S1 A7,8,A0
96 ADD .S1 A8,A9,A9
97 STB .D2T1 A7,*B4++
98|| ADD .S1 A0,A9,A9
99 STB .D2T1 A8,*B4++
100
101#else
102
103 LDBU .D1T1 *A4++,A7
104 LDBU .D1T1 *A4++,A8
105 NOP 3
106 ADD .S1 A7,A9,A9
107 SHL .S1 A8,8,A0
108
109 STB .D2T1 A7,*B4++
110|| ADD .S1 A0,A9,A9
111 STB .D2T1 A8,*B4++
112
113#endif
114
115 ;; Manage eventually the last byte
116L82: AND .S2X 1,A6,B0
117 [!B0] BNOP .S1 L9,5
118
119|| ZERO .L1 A7
120
121L83: LDBU .D1T1 *A4++,A7
122 NOP 4
123
124 MV .L2X A7,B7
125
126#ifdef CONFIG_CPU_BIG_ENDIAN
127
128 STB .D2T2 B7,*B4++
129|| SHL .S1 A7,8,A7
130 ADD .S1 A7,A9,A9
131
132#else
133
134 STB .D2T2 B7,*B4++
135|| ADD .S1 A7,A9,A9
136
137#endif
138
139 ;; Fold the csum
140L9: SHRU .S2X A9,16,B0
141 [!B0] BNOP .S1 L10,5
142
143L91: SHRU .S2X A9,16,B4
144|| EXTU .S1 A9,16,16,A3
145 ADD .D1X A3,B4,A9
146
147 SHRU .S1 A9,16,A0
148 [A0] BNOP .S1 L91,5
149
150L10: ADD .D1 A31,A9,A9
151 MV .D1 A9,A4
152
153 BNOP .S2 B3,4
154 MVC .S2 B30,ILC
155ENDPROC(csum_partial_copy)
156
157;
158;unsigned short
159;ip_fast_csum(unsigned char *iph, unsigned int ihl)
160;{
161; unsigned int checksum = 0;
162; unsigned short *tosum = (unsigned short *) iph;
163; int len;
164;
165; len = ihl*4;
166;
167; if (len <= 0)
168; return 0;
169;
170; while(len) {
171; len -= 2;
172; checksum += *tosum++;
173; }
174; if (len & 1)
175; checksum += *(unsigned char*) tosum;
176;
177; while(checksum >> 16)
178; checksum = (checksum & 0xffff) + (checksum >> 16);
179;
180; return ~checksum;
181;}
182;
183; A4: iph
184; B4: ihl
185; return checksum in A4
186;
187 .text
188
189ENTRY(ip_fast_csum)
190 ZERO .D1 A5
191 || MVC .S2 ILC,B30
192 SHL .S2 B4,2,B0
193 CMPGT .L2 B0,0,B1
194 [!B1] BNOP .S1 L15,4
195 [!B1] ZERO .D1 A3
196
197 [!B0] B .S1 L12
198 SHRU .S2 B0,1,B0
199 MVC .S2 B0,ILC
200 NOP 3
201
202 SPLOOP 1
203 LDHU .D1T1 *A4++,A3
204 NOP 3
205 NOP
206 SPKERNEL 5,0
207 || ADD .L1 A3,A5,A5
208
209L12: SHRU .S1 A5,16,A0
210 [!A0] BNOP .S1 L14,5
211
212L13: SHRU .S2X A5,16,B4
213 EXTU .S1 A5,16,16,A3
214 ADD .D1X A3,B4,A5
215 SHRU .S1 A5,16,A0
216 [A0] BNOP .S1 L13,5
217
218L14: NOT .D1 A5,A3
219 EXTU .S1 A3,16,16,A3
220
221L15: BNOP .S2 B3,3
222 MVC .S2 B30,ILC
223 MV .D1 A3,A4
224ENDPROC(ip_fast_csum)
225
226;
227;unsigned short
228;do_csum(unsigned char *buff, unsigned int len)
229;{
230; int odd, count;
231; unsigned int result = 0;
232;
233; if (len <= 0)
234; goto out;
235; odd = 1 & (unsigned long) buff;
236; if (odd) {
237;#ifdef __LITTLE_ENDIAN
238; result += (*buff << 8);
239;#else
240; result = *buff;
241;#endif
242; len--;
243; buff++;
244; }
245; count = len >> 1; /* nr of 16-bit words.. */
246; if (count) {
247; if (2 & (unsigned long) buff) {
248; result += *(unsigned short *) buff;
249; count--;
250; len -= 2;
251; buff += 2;
252; }
253; count >>= 1; /* nr of 32-bit words.. */
254; if (count) {
255; unsigned int carry = 0;
256; do {
257; unsigned int w = *(unsigned int *) buff;
258; count--;
259; buff += 4;
260; result += carry;
261; result += w;
262; carry = (w > result);
263; } while (count);
264; result += carry;
265; result = (result & 0xffff) + (result >> 16);
266; }
267; if (len & 2) {
268; result += *(unsigned short *) buff;
269; buff += 2;
270; }
271; }
272; if (len & 1)
273;#ifdef __LITTLE_ENDIAN
274; result += *buff;
275;#else
276; result += (*buff << 8);
277;#endif
278; result = (result & 0xffff) + (result >> 16);
279; /* add up carry.. */
280; result = (result & 0xffff) + (result >> 16);
281; if (odd)
282; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
283;out:
284; return result;
285;}
286;
287; A4: buff
288; B4: len
289; return checksum in A4
290;
291
292ENTRY(do_csum)
293 CMPGT .L2 B4,0,B0
294 [!B0] BNOP .S1 L26,3
295 EXTU .S1 A4,31,31,A0
296
297 MV .L1 A0,A3
298|| MV .S1X B3,A5
299|| MV .L2 B4,B3
300|| ZERO .D1 A1
301
302#ifdef CONFIG_CPU_BIG_ENDIAN
303 [A0] SUB .L2 B3,1,B3
304|| [A0] LDBU .D1T1 *A4++,A1
305#else
306 [!A0] BNOP .S1 L21,5
307|| [A0] LDBU .D1T1 *A4++,A0
308 SUB .L2 B3,1,B3
309|| SHL .S1 A0,8,A1
310L21:
311#endif
312 SHR .S2 B3,1,B0
313 [!B0] BNOP .S1 L24,3
314 MVK .L1 2,A0
315 AND .L1 A4,A0,A0
316
317 [!A0] BNOP .S1 L22,5
318|| [A0] LDHU .D1T1 *A4++,A0
319 SUB .L2 B0,1,B0
320|| SUB .S2 B3,2,B3
321|| ADD .L1 A0,A1,A1
322L22:
323 SHR .S2 B0,1,B0
324|| ZERO .L1 A0
325
326 [!B0] BNOP .S1 L23,5
327|| [B0] MVC .S2 B0,ILC
328
329 SPLOOP 3
330 SPMASK L1
331|| MV .L1 A1,A2
332|| LDW .D1T1 *A4++,A1
333
334 NOP 4
335 ADD .L1 A0,A1,A0
336 ADD .L1 A2,A0,A2
337
338 SPKERNEL 1,2
339|| CMPGTU .L1 A1,A2,A0
340
341 ADD .L1 A0,A2,A6
342 EXTU .S1 A6,16,16,A7
343 SHRU .S2X A6,16,B0
344 NOP 1
345 ADD .L1X A7,B0,A1
346L23:
347 MVK .L2 2,B0
348 AND .L2 B3,B0,B0
349 [B0] LDHU .D1T1 *A4++,A0
350 NOP 4
351 [B0] ADD .L1 A0,A1,A1
352L24:
353 EXTU .S2 B3,31,31,B0
354#ifdef CONFIG_CPU_BIG_ENDIAN
355 [!B0] BNOP .S1 L25,4
356|| [B0] LDBU .D1T1 *A4,A0
357 SHL .S1 A0,8,A0
358 ADD .L1 A0,A1,A1
359L25:
360#else
361 [B0] LDBU .D1T1 *A4,A0
362 NOP 4
363 [B0] ADD .L1 A0,A1,A1
364#endif
365 EXTU .S1 A1,16,16,A0
366 SHRU .S2X A1,16,B0
367 NOP 1
368 ADD .L1X A0,B0,A0
369 SHRU .S1 A0,16,A1
370 ADD .L1 A0,A1,A0
371 EXTU .S1 A0,16,16,A1
372 EXTU .S1 A1,16,24,A2
373
374 EXTU .S1 A1,24,16,A0
375|| MV .L2X A3,B0
376
377 [B0] OR .L1 A0,A2,A1
378L26:
379 NOP 1
380 BNOP .S2X A5,4
381 MV .L1 A1,A4
382ENDPROC(do_csum)
383
384;__wsum csum_partial(const void *buff, int len, __wsum wsum)
385;{
386; unsigned int sum = (__force unsigned int)wsum;
387; unsigned int result = do_csum(buff, len);
388;
389; /* add in old sum, and carry.. */
390; result += sum;
391; if (sum > result)
392; result += 1;
393; return (__force __wsum)result;
394;}
395;
396ENTRY(csum_partial)
397 MV .L1X B3,A9
398|| CALLP .S2 do_csum,B3
399|| MV .S1 A6,A8
400 BNOP .S2X A9,2
401 ADD .L1 A8,A4,A1
402 CMPGTU .L1 A8,A1,A0
403 ADD .L1 A1,A0,A4
404ENDPROC(csum_partial)
405
406;unsigned short
407;ip_compute_csum(unsigned char *buff, unsigned int len)
408;
409; A4: buff
410; B4: len
411; return checksum in A4
412
413ENTRY(ip_compute_csum)
414 MV .L1X B3,A9
415|| CALLP .S2 do_csum,B3
416 BNOP .S2X A9,3
417 NOT .S1 A4,A4
418 CLR .S1 A4,16,31,A4
419ENDPROC(ip_compute_csum)