Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 LT |
2 | #ifndef _INET_ECN_H_ |
3 | #define _INET_ECN_H_ | |
4 | ||
5 | #include <linux/ip.h> | |
2566a509 | 6 | #include <linux/skbuff.h> |
d7bf2ebe | 7 | #include <linux/if_vlan.h> |
14c85021 ACM |
8 | |
9 | #include <net/inet_sock.h> | |
1da177e4 | 10 | #include <net/dsfield.h> |
0780b414 | 11 | #include <net/checksum.h> |
1da177e4 LT |
12 | |
13 | enum { | |
14 | INET_ECN_NOT_ECT = 0, | |
15 | INET_ECN_ECT_1 = 1, | |
16 | INET_ECN_ECT_0 = 2, | |
17 | INET_ECN_CE = 3, | |
18 | INET_ECN_MASK = 3, | |
19 | }; | |
20 | ||
eccc1bb8 | 21 | extern int sysctl_tunnel_ecn_log; |
22 | ||
1da177e4 LT |
23 | static inline int INET_ECN_is_ce(__u8 dsfield) |
24 | { | |
25 | return (dsfield & INET_ECN_MASK) == INET_ECN_CE; | |
26 | } | |
27 | ||
28 | static inline int INET_ECN_is_not_ect(__u8 dsfield) | |
29 | { | |
30 | return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; | |
31 | } | |
32 | ||
33 | static inline int INET_ECN_is_capable(__u8 dsfield) | |
34 | { | |
a02cec21 | 35 | return dsfield & INET_ECN_ECT_0; |
1da177e4 LT |
36 | } |
37 | ||
b5d9c9c2 ED |
38 | /* |
39 | * RFC 3168 9.1.1 | |
40 | * The full-functionality option for ECN encapsulation is to copy the | |
41 | * ECN codepoint of the inside header to the outside header on | |
42 | * encapsulation if the inside header is not-ECT or ECT, and to set the | |
43 | * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of | |
44 | * the inside header is CE. | |
45 | */ | |
1da177e4 LT |
46 | static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) |
47 | { | |
48 | outer &= ~INET_ECN_MASK; | |
49 | outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : | |
50 | INET_ECN_ECT_0; | |
51 | return outer; | |
52 | } | |
53 | ||
ca067070 SG |
54 | static inline void INET_ECN_xmit(struct sock *sk) |
55 | { | |
56 | inet_sk(sk)->tos |= INET_ECN_ECT_0; | |
57 | if (inet6_sk(sk) != NULL) | |
58 | inet6_sk(sk)->tclass |= INET_ECN_ECT_0; | |
59 | } | |
60 | ||
61 | static inline void INET_ECN_dontxmit(struct sock *sk) | |
62 | { | |
63 | inet_sk(sk)->tos &= ~INET_ECN_MASK; | |
64 | if (inet6_sk(sk) != NULL) | |
65 | inet6_sk(sk)->tclass &= ~INET_ECN_MASK; | |
66 | } | |
1da177e4 LT |
67 | |
68 | #define IP6_ECN_flow_init(label) do { \ | |
69 | (label) &= ~htonl(INET_ECN_MASK << 20); \ | |
70 | } while (0) | |
71 | ||
72 | #define IP6_ECN_flow_xmit(sk, label) do { \ | |
e9df2e8f | 73 | if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ |
95026cd2 | 74 | (label) |= htonl(INET_ECN_ECT_0 << 20); \ |
1da177e4 LT |
75 | } while (0) |
76 | ||
2566a509 | 77 | static inline int IP_ECN_set_ce(struct iphdr *iph) |
1da177e4 | 78 | { |
1da177e4 | 79 | u32 ecn = (iph->tos + 1) & INET_ECN_MASK; |
0780b414 | 80 | __be16 check_add; |
1da177e4 LT |
81 | |
82 | /* | |
83 | * After the last operation we have (in binary): | |
84 | * INET_ECN_NOT_ECT => 01 | |
85 | * INET_ECN_ECT_1 => 10 | |
86 | * INET_ECN_ECT_0 => 11 | |
87 | * INET_ECN_CE => 00 | |
88 | */ | |
89 | if (!(ecn & 2)) | |
2566a509 | 90 | return !ecn; |
1da177e4 LT |
91 | |
92 | /* | |
93 | * The following gives us: | |
94 | * INET_ECN_ECT_1 => check += htons(0xFFFD) | |
95 | * INET_ECN_ECT_0 => check += htons(0xFFFE) | |
96 | */ | |
0780b414 THJ |
97 | check_add = (__force __be16)((__force u16)htons(0xFFFB) + |
98 | (__force u16)htons(ecn)); | |
1da177e4 | 99 | |
0780b414 | 100 | iph->check = csum16_add(iph->check, check_add); |
1da177e4 | 101 | iph->tos |= INET_ECN_CE; |
2566a509 | 102 | return 1; |
1da177e4 LT |
103 | } |
104 | ||
b7237487 THJ |
105 | static inline int IP_ECN_set_ect1(struct iphdr *iph) |
106 | { | |
b7237487 THJ |
107 | if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) |
108 | return 0; | |
109 | ||
0780b414 | 110 | iph->check = csum16_add(iph->check, htons(0x1)); |
b7237487 THJ |
111 | iph->tos ^= INET_ECN_MASK; |
112 | return 1; | |
113 | } | |
114 | ||
1da177e4 LT |
115 | static inline void IP_ECN_clear(struct iphdr *iph) |
116 | { | |
117 | iph->tos &= ~INET_ECN_MASK; | |
118 | } | |
119 | ||
29bb43b4 | 120 | static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) |
1da177e4 | 121 | { |
29bb43b4 | 122 | dscp &= ~INET_ECN_MASK; |
1da177e4 LT |
123 | ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); |
124 | } | |
125 | ||
126 | struct ipv6hdr; | |
127 | ||
34ae6a1a ED |
128 | /* Note: |
129 | * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, | |
130 | * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE | |
131 | * In IPv6 case, no checksum compensates the change in IPv6 header, | |
132 | * so we have to update skb->csum. | |
133 | */ | |
134 | static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) | |
1da177e4 | 135 | { |
34ae6a1a ED |
136 | __be32 from, to; |
137 | ||
1da177e4 | 138 | if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) |
2566a509 | 139 | return 0; |
34ae6a1a ED |
140 | |
141 | from = *(__be32 *)iph; | |
142 | to = from | htonl(INET_ECN_CE << 20); | |
143 | *(__be32 *)iph = to; | |
144 | if (skb->ip_summed == CHECKSUM_COMPLETE) | |
c15c0ab1 JB |
145 | skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), |
146 | (__force __wsum)to); | |
2566a509 | 147 | return 1; |
1da177e4 LT |
148 | } |
149 | ||
b7237487 THJ |
150 | static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) |
151 | { | |
152 | __be32 from, to; | |
153 | ||
154 | if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) | |
155 | return 0; | |
156 | ||
157 | from = *(__be32 *)iph; | |
158 | to = from ^ htonl(INET_ECN_MASK << 20); | |
159 | *(__be32 *)iph = to; | |
160 | if (skb->ip_summed == CHECKSUM_COMPLETE) | |
161 | skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), | |
162 | (__force __wsum)to); | |
163 | return 1; | |
164 | } | |
165 | ||
29bb43b4 | 166 | static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) |
1da177e4 | 167 | { |
29bb43b4 | 168 | dscp &= ~INET_ECN_MASK; |
1da177e4 LT |
169 | ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); |
170 | } | |
171 | ||
2566a509 TG |
172 | static inline int INET_ECN_set_ce(struct sk_buff *skb) |
173 | { | |
d7bf2ebe | 174 | switch (skb_protocol(skb, true)) { |
f3a7c66b | 175 | case cpu_to_be16(ETH_P_IP): |
ced14f68 SH |
176 | if (skb_network_header(skb) + sizeof(struct iphdr) <= |
177 | skb_tail_pointer(skb)) | |
eddc9ec5 | 178 | return IP_ECN_set_ce(ip_hdr(skb)); |
2566a509 TG |
179 | break; |
180 | ||
f3a7c66b | 181 | case cpu_to_be16(ETH_P_IPV6): |
ced14f68 SH |
182 | if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= |
183 | skb_tail_pointer(skb)) | |
34ae6a1a | 184 | return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); |
2566a509 TG |
185 | break; |
186 | } | |
187 | ||
188 | return 0; | |
189 | } | |
190 | ||
b7237487 THJ |
191 | static inline int INET_ECN_set_ect1(struct sk_buff *skb) |
192 | { | |
d7bf2ebe | 193 | switch (skb_protocol(skb, true)) { |
b7237487 THJ |
194 | case cpu_to_be16(ETH_P_IP): |
195 | if (skb_network_header(skb) + sizeof(struct iphdr) <= | |
196 | skb_tail_pointer(skb)) | |
197 | return IP_ECN_set_ect1(ip_hdr(skb)); | |
198 | break; | |
199 | ||
200 | case cpu_to_be16(ETH_P_IPV6): | |
201 | if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= | |
202 | skb_tail_pointer(skb)) | |
203 | return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); | |
204 | break; | |
205 | } | |
206 | ||
207 | return 0; | |
208 | } | |
209 | ||
eccc1bb8 | 210 | /* |
d28071d1 | 211 | * RFC 6040 4.2 |
eccc1bb8 | 212 | * To decapsulate the inner header at the tunnel egress, a compliant |
213 | * tunnel egress MUST set the outgoing ECN field to the codepoint at the | |
214 | * intersection of the appropriate arriving inner header (row) and outer | |
215 | * header (column) in Figure 4 | |
216 | * | |
217 | * +---------+------------------------------------------------+ | |
218 | * |Arriving | Arriving Outer Header | | |
219 | * | Inner +---------+------------+------------+------------+ | |
220 | * | Header | Not-ECT | ECT(0) | ECT(1) | CE | | |
221 | * +---------+---------+------------+------------+------------+ | |
222 | * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| | |
223 | * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | | |
224 | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | | |
225 | * | CE | CE | CE | CE(!!!)| CE | | |
226 | * +---------+---------+------------+------------+------------+ | |
227 | * | |
228 | * Figure 4: New IP in IP Decapsulation Behaviour | |
229 | * | |
230 | * returns 0 on success | |
231 | * 1 if something is broken and should be logged (!!! above) | |
232 | * 2 if packet should be dropped | |
233 | */ | |
28e45033 | 234 | static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) |
eccc1bb8 | 235 | { |
236 | if (INET_ECN_is_not_ect(inner)) { | |
237 | switch (outer & INET_ECN_MASK) { | |
238 | case INET_ECN_NOT_ECT: | |
239 | return 0; | |
240 | case INET_ECN_ECT_0: | |
241 | case INET_ECN_ECT_1: | |
242 | return 1; | |
243 | case INET_ECN_CE: | |
244 | return 2; | |
245 | } | |
246 | } | |
247 | ||
28e45033 IS |
248 | *set_ce = INET_ECN_is_ce(outer); |
249 | return 0; | |
250 | } | |
251 | ||
252 | static inline int INET_ECN_decapsulate(struct sk_buff *skb, | |
253 | __u8 outer, __u8 inner) | |
254 | { | |
255 | bool set_ce = false; | |
256 | int rc; | |
257 | ||
258 | rc = __INET_ECN_decapsulate(outer, inner, &set_ce); | |
b7237487 THJ |
259 | if (!rc) { |
260 | if (set_ce) | |
261 | INET_ECN_set_ce(skb); | |
262 | else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) | |
263 | INET_ECN_set_ect1(skb); | |
264 | } | |
eccc1bb8 | 265 | |
28e45033 | 266 | return rc; |
eccc1bb8 | 267 | } |
268 | ||
269 | static inline int IP_ECN_decapsulate(const struct iphdr *oiph, | |
270 | struct sk_buff *skb) | |
271 | { | |
272 | __u8 inner; | |
273 | ||
d7bf2ebe THJ |
274 | switch (skb_protocol(skb, true)) { |
275 | case htons(ETH_P_IP): | |
eccc1bb8 | 276 | inner = ip_hdr(skb)->tos; |
d7bf2ebe THJ |
277 | break; |
278 | case htons(ETH_P_IPV6): | |
eccc1bb8 | 279 | inner = ipv6_get_dsfield(ipv6_hdr(skb)); |
d7bf2ebe THJ |
280 | break; |
281 | default: | |
eccc1bb8 | 282 | return 0; |
d7bf2ebe | 283 | } |
eccc1bb8 | 284 | |
285 | return INET_ECN_decapsulate(skb, oiph->tos, inner); | |
286 | } | |
287 | ||
288 | static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, | |
289 | struct sk_buff *skb) | |
290 | { | |
291 | __u8 inner; | |
292 | ||
d7bf2ebe THJ |
293 | switch (skb_protocol(skb, true)) { |
294 | case htons(ETH_P_IP): | |
eccc1bb8 | 295 | inner = ip_hdr(skb)->tos; |
d7bf2ebe THJ |
296 | break; |
297 | case htons(ETH_P_IPV6): | |
eccc1bb8 | 298 | inner = ipv6_get_dsfield(ipv6_hdr(skb)); |
d7bf2ebe THJ |
299 | break; |
300 | default: | |
eccc1bb8 | 301 | return 0; |
d7bf2ebe | 302 | } |
eccc1bb8 | 303 | |
304 | return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); | |
305 | } | |
1da177e4 | 306 | #endif |