Commit | Line | Data |
---|---|---|
09831ca7 AJ |
1 | ; |
2 | ; linux/arch/c6x/lib/csum_64plus.s | |
3 | ; | |
4 | ; Port on Texas Instruments TMS320C6x architecture | |
5 | ; | |
6 | ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated | |
7 | ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) | |
8 | ; | |
9 | ; This program is free software; you can redistribute it and/or modify | |
10 | ; it under the terms of the GNU General Public License version 2 as | |
11 | ; published by the Free Software Foundation. | |
12 | ; | |
13 | #include <linux/linkage.h> | |
14 | ||
15 | ; | |
16 | ;unsigned int csum_partial_copy(const char *src, char * dst, | |
17 | ; int len, int sum) | |
18 | ; | |
19 | ; A4: src | |
20 | ; B4: dst | |
21 | ; A6: len | |
22 | ; B6: sum | |
23 | ; return csum in A4 | |
24 | ; | |
25 | ||
26 | .text | |
27 | ENTRY(csum_partial_copy) | |
28 | MVC .S2 ILC,B30 | |
29 | ||
30 | MV .D1X B6,A31 ; given csum | |
31 | ZERO .D1 A9 ; csum (a side) | |
32 | || ZERO .D2 B9 ; csum (b side) | |
33 | || SHRU .S2X A6,2,B5 ; len / 4 | |
34 | ||
35 | ;; Check alignment and size | |
36 | AND .S1 3,A4,A1 | |
37 | || AND .S2 3,B4,B0 | |
38 | OR .L2X B0,A1,B0 ; non aligned condition | |
39 | || MVC .S2 B5,ILC | |
40 | || MVK .D2 1,B2 | |
41 | || MV .D1X B5,A1 ; words condition | |
42 | [!A1] B .S1 L8 | |
43 | [B0] BNOP .S1 L6,5 | |
44 | ||
45 | SPLOOP 1 | |
46 | ||
47 | ;; Main loop for aligned words | |
48 | LDW .D1T1 *A4++,A7 | |
49 | NOP 4 | |
50 | MV .S2X A7,B7 | |
51 | || EXTU .S1 A7,0,16,A16 | |
52 | STW .D2T2 B7,*B4++ | |
53 | || MPYU .M2 B7,B2,B8 | |
54 | || ADD .L1 A16,A9,A9 | |
55 | NOP | |
56 | SPKERNEL 8,0 | |
57 | || ADD .L2 B8,B9,B9 | |
58 | ||
59 | ZERO .D1 A1 | |
60 | || ADD .L1X A9,B9,A9 ; add csum from a and b sides | |
61 | ||
62 | L6: | |
63 | [!A1] BNOP .S1 L8,5 | |
64 | ||
65 | ;; Main loop for non-aligned words | |
66 | SPLOOP 2 | |
67 | || MVK .L1 1,A2 | |
68 | ||
69 | LDNW .D1T1 *A4++,A7 | |
70 | NOP 3 | |
71 | ||
72 | NOP | |
73 | MV .S2X A7,B7 | |
74 | || EXTU .S1 A7,0,16,A16 | |
75 | || MPYU .M1 A7,A2,A8 | |
76 | ||
77 | ADD .L1 A16,A9,A9 | |
78 | SPKERNEL 6,0 | |
79 | || STNW .D2T2 B7,*B4++ | |
80 | || ADD .L1 A8,A9,A9 | |
81 | ||
82 | L8: AND .S2X 2,A6,B5 | |
83 | CMPGT .L2 B5,0,B0 | |
84 | [!B0] BNOP .S1 L82,4 | |
85 | ||
86 | ;; Manage half-word | |
87 | ZERO .L1 A7 | |
88 | || ZERO .D1 A8 | |
89 | ||
90 | #ifdef CONFIG_CPU_BIG_ENDIAN | |
91 | ||
92 | LDBU .D1T1 *A4++,A7 | |
93 | LDBU .D1T1 *A4++,A8 | |
94 | NOP 3 | |
95 | SHL .S1 A7,8,A0 | |
96 | ADD .S1 A8,A9,A9 | |
97 | STB .D2T1 A7,*B4++ | |
98 | || ADD .S1 A0,A9,A9 | |
99 | STB .D2T1 A8,*B4++ | |
100 | ||
101 | #else | |
102 | ||
103 | LDBU .D1T1 *A4++,A7 | |
104 | LDBU .D1T1 *A4++,A8 | |
105 | NOP 3 | |
106 | ADD .S1 A7,A9,A9 | |
107 | SHL .S1 A8,8,A0 | |
108 | ||
109 | STB .D2T1 A7,*B4++ | |
110 | || ADD .S1 A0,A9,A9 | |
111 | STB .D2T1 A8,*B4++ | |
112 | ||
113 | #endif | |
114 | ||
115 | ;; Manage eventually the last byte | |
116 | L82: AND .S2X 1,A6,B0 | |
117 | [!B0] BNOP .S1 L9,5 | |
118 | ||
119 | || ZERO .L1 A7 | |
120 | ||
121 | L83: LDBU .D1T1 *A4++,A7 | |
122 | NOP 4 | |
123 | ||
124 | MV .L2X A7,B7 | |
125 | ||
126 | #ifdef CONFIG_CPU_BIG_ENDIAN | |
127 | ||
128 | STB .D2T2 B7,*B4++ | |
129 | || SHL .S1 A7,8,A7 | |
130 | ADD .S1 A7,A9,A9 | |
131 | ||
132 | #else | |
133 | ||
134 | STB .D2T2 B7,*B4++ | |
135 | || ADD .S1 A7,A9,A9 | |
136 | ||
137 | #endif | |
138 | ||
139 | ;; Fold the csum | |
140 | L9: SHRU .S2X A9,16,B0 | |
141 | [!B0] BNOP .S1 L10,5 | |
142 | ||
143 | L91: SHRU .S2X A9,16,B4 | |
144 | || EXTU .S1 A9,16,16,A3 | |
145 | ADD .D1X A3,B4,A9 | |
146 | ||
147 | SHRU .S1 A9,16,A0 | |
148 | [A0] BNOP .S1 L91,5 | |
149 | ||
150 | L10: ADD .D1 A31,A9,A9 | |
151 | MV .D1 A9,A4 | |
152 | ||
153 | BNOP .S2 B3,4 | |
154 | MVC .S2 B30,ILC | |
155 | ENDPROC(csum_partial_copy) | |
156 | ||
157 | ; | |
158 | ;unsigned short | |
159 | ;ip_fast_csum(unsigned char *iph, unsigned int ihl) | |
160 | ;{ | |
161 | ; unsigned int checksum = 0; | |
162 | ; unsigned short *tosum = (unsigned short *) iph; | |
163 | ; int len; | |
164 | ; | |
165 | ; len = ihl*4; | |
166 | ; | |
167 | ; if (len <= 0) | |
168 | ; return 0; | |
169 | ; | |
170 | ; while(len) { | |
171 | ; len -= 2; | |
172 | ; checksum += *tosum++; | |
173 | ; } | |
174 | ; if (len & 1) | |
175 | ; checksum += *(unsigned char*) tosum; | |
176 | ; | |
177 | ; while(checksum >> 16) | |
178 | ; checksum = (checksum & 0xffff) + (checksum >> 16); | |
179 | ; | |
180 | ; return ~checksum; | |
181 | ;} | |
182 | ; | |
183 | ; A4: iph | |
184 | ; B4: ihl | |
185 | ; return checksum in A4 | |
186 | ; | |
187 | .text | |
188 | ||
189 | ENTRY(ip_fast_csum) | |
190 | ZERO .D1 A5 | |
191 | || MVC .S2 ILC,B30 | |
192 | SHL .S2 B4,2,B0 | |
193 | CMPGT .L2 B0,0,B1 | |
194 | [!B1] BNOP .S1 L15,4 | |
195 | [!B1] ZERO .D1 A3 | |
196 | ||
197 | [!B0] B .S1 L12 | |
198 | SHRU .S2 B0,1,B0 | |
199 | MVC .S2 B0,ILC | |
200 | NOP 3 | |
201 | ||
202 | SPLOOP 1 | |
203 | LDHU .D1T1 *A4++,A3 | |
204 | NOP 3 | |
205 | NOP | |
206 | SPKERNEL 5,0 | |
207 | || ADD .L1 A3,A5,A5 | |
208 | ||
209 | L12: SHRU .S1 A5,16,A0 | |
210 | [!A0] BNOP .S1 L14,5 | |
211 | ||
212 | L13: SHRU .S2X A5,16,B4 | |
213 | EXTU .S1 A5,16,16,A3 | |
214 | ADD .D1X A3,B4,A5 | |
215 | SHRU .S1 A5,16,A0 | |
216 | [A0] BNOP .S1 L13,5 | |
217 | ||
218 | L14: NOT .D1 A5,A3 | |
219 | EXTU .S1 A3,16,16,A3 | |
220 | ||
221 | L15: BNOP .S2 B3,3 | |
222 | MVC .S2 B30,ILC | |
223 | MV .D1 A3,A4 | |
224 | ENDPROC(ip_fast_csum) | |
225 | ||
226 | ; | |
227 | ;unsigned short | |
228 | ;do_csum(unsigned char *buff, unsigned int len) | |
229 | ;{ | |
230 | ; int odd, count; | |
231 | ; unsigned int result = 0; | |
232 | ; | |
233 | ; if (len <= 0) | |
234 | ; goto out; | |
235 | ; odd = 1 & (unsigned long) buff; | |
236 | ; if (odd) { | |
237 | ;#ifdef __LITTLE_ENDIAN | |
238 | ; result += (*buff << 8); | |
239 | ;#else | |
240 | ; result = *buff; | |
241 | ;#endif | |
242 | ; len--; | |
243 | ; buff++; | |
244 | ; } | |
245 | ; count = len >> 1; /* nr of 16-bit words.. */ | |
246 | ; if (count) { | |
247 | ; if (2 & (unsigned long) buff) { | |
248 | ; result += *(unsigned short *) buff; | |
249 | ; count--; | |
250 | ; len -= 2; | |
251 | ; buff += 2; | |
252 | ; } | |
253 | ; count >>= 1; /* nr of 32-bit words.. */ | |
254 | ; if (count) { | |
255 | ; unsigned int carry = 0; | |
256 | ; do { | |
257 | ; unsigned int w = *(unsigned int *) buff; | |
258 | ; count--; | |
259 | ; buff += 4; | |
260 | ; result += carry; | |
261 | ; result += w; | |
262 | ; carry = (w > result); | |
263 | ; } while (count); | |
264 | ; result += carry; | |
265 | ; result = (result & 0xffff) + (result >> 16); | |
266 | ; } | |
267 | ; if (len & 2) { | |
268 | ; result += *(unsigned short *) buff; | |
269 | ; buff += 2; | |
270 | ; } | |
271 | ; } | |
272 | ; if (len & 1) | |
273 | ;#ifdef __LITTLE_ENDIAN | |
274 | ; result += *buff; | |
275 | ;#else | |
276 | ; result += (*buff << 8); | |
277 | ;#endif | |
278 | ; result = (result & 0xffff) + (result >> 16); | |
279 | ; /* add up carry.. */ | |
280 | ; result = (result & 0xffff) + (result >> 16); | |
281 | ; if (odd) | |
282 | ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | |
283 | ;out: | |
284 | ; return result; | |
285 | ;} | |
286 | ; | |
287 | ; A4: buff | |
288 | ; B4: len | |
289 | ; return checksum in A4 | |
290 | ; | |
291 | ||
292 | ENTRY(do_csum) | |
293 | CMPGT .L2 B4,0,B0 | |
294 | [!B0] BNOP .S1 L26,3 | |
295 | EXTU .S1 A4,31,31,A0 | |
296 | ||
297 | MV .L1 A0,A3 | |
298 | || MV .S1X B3,A5 | |
299 | || MV .L2 B4,B3 | |
300 | || ZERO .D1 A1 | |
301 | ||
302 | #ifdef CONFIG_CPU_BIG_ENDIAN | |
303 | [A0] SUB .L2 B3,1,B3 | |
304 | || [A0] LDBU .D1T1 *A4++,A1 | |
305 | #else | |
306 | [!A0] BNOP .S1 L21,5 | |
307 | || [A0] LDBU .D1T1 *A4++,A0 | |
308 | SUB .L2 B3,1,B3 | |
309 | || SHL .S1 A0,8,A1 | |
310 | L21: | |
311 | #endif | |
312 | SHR .S2 B3,1,B0 | |
313 | [!B0] BNOP .S1 L24,3 | |
314 | MVK .L1 2,A0 | |
315 | AND .L1 A4,A0,A0 | |
316 | ||
317 | [!A0] BNOP .S1 L22,5 | |
318 | || [A0] LDHU .D1T1 *A4++,A0 | |
319 | SUB .L2 B0,1,B0 | |
320 | || SUB .S2 B3,2,B3 | |
321 | || ADD .L1 A0,A1,A1 | |
322 | L22: | |
323 | SHR .S2 B0,1,B0 | |
324 | || ZERO .L1 A0 | |
325 | ||
326 | [!B0] BNOP .S1 L23,5 | |
327 | || [B0] MVC .S2 B0,ILC | |
328 | ||
329 | SPLOOP 3 | |
330 | SPMASK L1 | |
331 | || MV .L1 A1,A2 | |
332 | || LDW .D1T1 *A4++,A1 | |
333 | ||
334 | NOP 4 | |
335 | ADD .L1 A0,A1,A0 | |
336 | ADD .L1 A2,A0,A2 | |
337 | ||
338 | SPKERNEL 1,2 | |
339 | || CMPGTU .L1 A1,A2,A0 | |
340 | ||
341 | ADD .L1 A0,A2,A6 | |
342 | EXTU .S1 A6,16,16,A7 | |
343 | SHRU .S2X A6,16,B0 | |
344 | NOP 1 | |
345 | ADD .L1X A7,B0,A1 | |
346 | L23: | |
347 | MVK .L2 2,B0 | |
348 | AND .L2 B3,B0,B0 | |
349 | [B0] LDHU .D1T1 *A4++,A0 | |
350 | NOP 4 | |
351 | [B0] ADD .L1 A0,A1,A1 | |
352 | L24: | |
353 | EXTU .S2 B3,31,31,B0 | |
354 | #ifdef CONFIG_CPU_BIG_ENDIAN | |
355 | [!B0] BNOP .S1 L25,4 | |
356 | || [B0] LDBU .D1T1 *A4,A0 | |
357 | SHL .S1 A0,8,A0 | |
358 | ADD .L1 A0,A1,A1 | |
359 | L25: | |
360 | #else | |
361 | [B0] LDBU .D1T1 *A4,A0 | |
362 | NOP 4 | |
363 | [B0] ADD .L1 A0,A1,A1 | |
364 | #endif | |
365 | EXTU .S1 A1,16,16,A0 | |
366 | SHRU .S2X A1,16,B0 | |
367 | NOP 1 | |
368 | ADD .L1X A0,B0,A0 | |
369 | SHRU .S1 A0,16,A1 | |
370 | ADD .L1 A0,A1,A0 | |
371 | EXTU .S1 A0,16,16,A1 | |
372 | EXTU .S1 A1,16,24,A2 | |
373 | ||
374 | EXTU .S1 A1,24,16,A0 | |
375 | || MV .L2X A3,B0 | |
376 | ||
377 | [B0] OR .L1 A0,A2,A1 | |
378 | L26: | |
379 | NOP 1 | |
380 | BNOP .S2X A5,4 | |
381 | MV .L1 A1,A4 | |
382 | ENDPROC(do_csum) | |
383 | ||
384 | ;__wsum csum_partial(const void *buff, int len, __wsum wsum) | |
385 | ;{ | |
386 | ; unsigned int sum = (__force unsigned int)wsum; | |
387 | ; unsigned int result = do_csum(buff, len); | |
388 | ; | |
389 | ; /* add in old sum, and carry.. */ | |
390 | ; result += sum; | |
391 | ; if (sum > result) | |
392 | ; result += 1; | |
393 | ; return (__force __wsum)result; | |
394 | ;} | |
395 | ; | |
396 | ENTRY(csum_partial) | |
397 | MV .L1X B3,A9 | |
398 | || CALLP .S2 do_csum,B3 | |
399 | || MV .S1 A6,A8 | |
400 | BNOP .S2X A9,2 | |
401 | ADD .L1 A8,A4,A1 | |
402 | CMPGTU .L1 A8,A1,A0 | |
403 | ADD .L1 A1,A0,A4 | |
404 | ENDPROC(csum_partial) | |
405 | ||
406 | ;unsigned short | |
407 | ;ip_compute_csum(unsigned char *buff, unsigned int len) | |
408 | ; | |
409 | ; A4: buff | |
410 | ; B4: len | |
411 | ; return checksum in A4 | |
412 | ||
413 | ENTRY(ip_compute_csum) | |
414 | MV .L1X B3,A9 | |
415 | || CALLP .S2 do_csum,B3 | |
416 | BNOP .S2X A9,3 | |
417 | NOT .S1 A4,A4 | |
418 | CLR .S1 A4,16,31,A4 | |
419 | ENDPROC(ip_compute_csum) |