Commit | Line | Data |
---|---|---|
08dbd0f8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
075a46a0 RK |
2 | /* |
3 | * Checksum functions for Hexagon | |
4 | * | |
e1858b2a | 5 | * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. |
075a46a0 RK |
6 | */ |
7 | ||
8 | /* This was derived from arch/alpha/lib/checksum.c */ | |
9 | ||
10 | ||
11 | #include <linux/module.h> | |
12 | #include <linux/string.h> | |
13 | ||
14 | #include <asm/byteorder.h> | |
15 | #include <net/checksum.h> | |
16 | #include <linux/uaccess.h> | |
17 | #include <asm/intrinsics.h> | |
18 | ||
19 | ||
20 | /* Vector value operations */ | |
21 | #define SIGN(x, y) ((0x8000ULL*x)<<y) | |
22 | #define CARRY(x, y) ((0x0002ULL*x)<<y) | |
23 | #define SELECT(x, y) ((0x0001ULL*x)<<y) | |
24 | ||
25 | #define VR_NEGATE(a, b, c, d) (SIGN(a, 48) + SIGN(b, 32) + SIGN(c, 16) \ | |
26 | + SIGN(d, 0)) | |
27 | #define VR_CARRY(a, b, c, d) (CARRY(a, 48) + CARRY(b, 32) + CARRY(c, 16) \ | |
28 | + CARRY(d, 0)) | |
29 | #define VR_SELECT(a, b, c, d) (SELECT(a, 48) + SELECT(b, 32) + SELECT(c, 16) \ | |
30 | + SELECT(d, 0)) | |
31 | ||
32 | ||
33 | /* optimized HEXAGON V3 intrinsic version */ | |
34 | static inline unsigned short from64to16(u64 x) | |
35 | { | |
36 | u64 sum; | |
37 | ||
38 | sum = HEXAGON_P_vrmpyh_PP(x^VR_NEGATE(1, 1, 1, 1), | |
39 | VR_SELECT(1, 1, 1, 1)); | |
40 | sum += VR_CARRY(0, 0, 1, 0); | |
41 | sum = HEXAGON_P_vrmpyh_PP(sum, VR_SELECT(0, 0, 1, 1)); | |
42 | ||
43 | return 0xFFFF & sum; | |
44 | } | |
45 | ||
46 | /* | |
47 | * computes the checksum of the TCP/UDP pseudo-header | |
48 | * returns a 16-bit checksum, already complemented. | |
49 | */ | |
01cfbad7 AD |
50 | __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, |
51 | __u32 len, __u8 proto, __wsum sum) | |
075a46a0 RK |
52 | { |
53 | return (__force __sum16)~from64to16( | |
54 | (__force u64)saddr + (__force u64)daddr + | |
55 | (__force u64)sum + ((len + proto) << 8)); | |
56 | } | |
57 | ||
01cfbad7 AD |
58 | __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, |
59 | __u32 len, __u8 proto, __wsum sum) | |
075a46a0 RK |
60 | { |
61 | u64 result; | |
62 | ||
63 | result = (__force u64)saddr + (__force u64)daddr + | |
64 | (__force u64)sum + ((len + proto) << 8); | |
65 | ||
66 | /* Fold down to 32-bits so we don't lose in the typedef-less | |
67 | network stack. */ | |
68 | /* 64 to 33 */ | |
69 | result = (result & 0xffffffffUL) + (result >> 32); | |
70 | /* 33 to 32 */ | |
71 | result = (result & 0xffffffffUL) + (result >> 32); | |
72 | return (__force __wsum)result; | |
73 | } | |
74 | EXPORT_SYMBOL(csum_tcpudp_nofold); | |
75 | ||
76 | /* | |
77 | * Do a 64-bit checksum on an arbitrary memory area.. | |
78 | * | |
79 | * This isn't a great routine, but it's not _horrible_ either. The | |
80 | * inner loop could be unrolled a bit further, and there are better | |
81 | * ways to do the carry, but this is reasonable. | |
82 | */ | |
83 | ||
84 | /* optimized HEXAGON intrinsic version, with over read fixed */ | |
85 | unsigned int do_csum(const void *voidptr, int len) | |
86 | { | |
87 | u64 sum0, sum1, x0, x1, *ptr8_o, *ptr8_e, *ptr8; | |
88 | int i, start, mid, end, mask; | |
89 | const char *ptr = voidptr; | |
90 | unsigned short *ptr2; | |
91 | unsigned int *ptr4; | |
92 | ||
93 | if (len <= 0) | |
94 | return 0; | |
95 | ||
96 | start = 0xF & (16-(((int) ptr) & 0xF)) ; | |
97 | mask = 0x7fffffffUL >> HEXAGON_R_cl0_R(len); | |
98 | start = start & mask ; | |
99 | ||
100 | mid = len - start; | |
101 | end = mid & 0xF; | |
102 | mid = mid>>4; | |
103 | sum0 = mid << 18; | |
104 | sum1 = 0; | |
105 | ||
106 | if (start & 1) | |
107 | sum0 += (u64) (ptr[0] << 8); | |
108 | ptr2 = (unsigned short *) &ptr[start & 1]; | |
109 | if (start & 2) | |
110 | sum1 += (u64) ptr2[0]; | |
111 | ptr4 = (unsigned int *) &ptr[start & 3]; | |
112 | if (start & 4) { | |
113 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | |
114 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), | |
115 | VR_SELECT(0, 0, 1, 1)); | |
116 | sum0 += VR_SELECT(0, 0, 1, 0); | |
117 | } | |
118 | ptr8 = (u64 *) &ptr[start & 7]; | |
119 | if (start & 8) { | |
120 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | |
121 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), | |
122 | VR_SELECT(1, 1, 1, 1)); | |
123 | sum1 += VR_CARRY(0, 0, 1, 0); | |
124 | } | |
125 | ptr8_o = (u64 *) (ptr + start); | |
126 | ptr8_e = (u64 *) (ptr + start + 8); | |
127 | ||
128 | if (mid) { | |
129 | x0 = *ptr8_e; ptr8_e += 2; | |
130 | x1 = *ptr8_o; ptr8_o += 2; | |
131 | if (mid > 1) | |
132 | for (i = 0; i < mid-1; i++) { | |
133 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | |
134 | x0^VR_NEGATE(1, 1, 1, 1), | |
135 | VR_SELECT(1, 1, 1, 1)); | |
136 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | |
137 | x1^VR_NEGATE(1, 1, 1, 1), | |
138 | VR_SELECT(1, 1, 1, 1)); | |
139 | x0 = *ptr8_e; ptr8_e += 2; | |
140 | x1 = *ptr8_o; ptr8_o += 2; | |
141 | } | |
142 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, x0^VR_NEGATE(1, 1, 1, 1), | |
143 | VR_SELECT(1, 1, 1, 1)); | |
144 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, x1^VR_NEGATE(1, 1, 1, 1), | |
145 | VR_SELECT(1, 1, 1, 1)); | |
146 | } | |
147 | ||
148 | ptr4 = (unsigned int *) &ptr[start + (mid * 16) + (end & 8)]; | |
149 | if (end & 4) { | |
150 | sum1 = HEXAGON_P_vrmpyhacc_PP(sum1, | |
151 | VR_NEGATE(0, 0, 1, 1)^((u64)ptr4[0]), | |
152 | VR_SELECT(0, 0, 1, 1)); | |
153 | sum1 += VR_SELECT(0, 0, 1, 0); | |
154 | } | |
155 | ptr2 = (unsigned short *) &ptr[start + (mid * 16) + (end & 12)]; | |
156 | if (end & 2) | |
157 | sum0 += (u64) ptr2[0]; | |
158 | ||
159 | if (end & 1) | |
160 | sum1 += (u64) ptr[start + (mid * 16) + (end & 14)]; | |
161 | ||
162 | ptr8 = (u64 *) &ptr[start + (mid * 16)]; | |
163 | if (end & 8) { | |
164 | sum0 = HEXAGON_P_vrmpyhacc_PP(sum0, | |
165 | VR_NEGATE(1, 1, 1, 1)^(ptr8[0]), | |
166 | VR_SELECT(1, 1, 1, 1)); | |
167 | sum0 += VR_CARRY(0, 0, 1, 0); | |
168 | } | |
169 | sum0 = HEXAGON_P_vrmpyh_PP((sum0+sum1)^VR_NEGATE(0, 0, 0, 1), | |
170 | VR_SELECT(0, 0, 1, 1)); | |
171 | sum0 += VR_NEGATE(0, 0, 0, 1); | |
172 | sum0 = HEXAGON_P_vrmpyh_PP(sum0, VR_SELECT(0, 0, 1, 1)); | |
173 | ||
174 | if (start & 1) | |
175 | sum0 = (sum0 << 8) | (0xFF & (sum0 >> 8)); | |
176 | ||
177 | return 0xFFFF & sum0; | |
178 | } |