Commit | Line | Data |
---|---|---|
cf308a1f HS |
1 | /* |
2 | * xt_HMARK - Netfilter module to set mark by means of hashing | |
3 | * | |
4 | * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com> | |
5 | * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org> | |
6 | * | |
7 | * This program is free software; you can redistribute it and/or modify it | |
8 | * under the terms of the GNU General Public License version 2 as published by | |
9 | * the Free Software Foundation. | |
10 | */ | |
11 | ||
c08e5e1e FW |
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
13 | ||
cf308a1f HS |
14 | #include <linux/module.h> |
15 | #include <linux/skbuff.h> | |
16 | #include <linux/icmp.h> | |
17 | ||
18 | #include <linux/netfilter/x_tables.h> | |
19 | #include <linux/netfilter/xt_HMARK.h> | |
20 | ||
21 | #include <net/ip.h> | |
22 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
23 | #include <net/netfilter/nf_conntrack.h> | |
24 | #endif | |
25 | #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) | |
26 | #include <net/ipv6.h> | |
27 | #include <linux/netfilter_ipv6/ip6_tables.h> | |
28 | #endif | |
29 | ||
30 | MODULE_LICENSE("GPL"); | |
31 | MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>"); | |
32 | MODULE_DESCRIPTION("Xtables: packet marking using hash calculation"); | |
33 | MODULE_ALIAS("ipt_HMARK"); | |
34 | MODULE_ALIAS("ip6t_HMARK"); | |
35 | ||
36 | struct hmark_tuple { | |
d1992b16 HS |
37 | __be32 src; |
38 | __be32 dst; | |
cf308a1f | 39 | union hmark_ports uports; |
d1992b16 | 40 | u8 proto; |
cf308a1f HS |
41 | }; |
42 | ||
d1992b16 | 43 | static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) |
cf308a1f HS |
44 | { |
45 | return (addr32[0] & mask[0]) ^ | |
46 | (addr32[1] & mask[1]) ^ | |
47 | (addr32[2] & mask[2]) ^ | |
48 | (addr32[3] & mask[3]); | |
49 | } | |
50 | ||
d1992b16 HS |
51 | static inline __be32 |
52 | hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) | |
cf308a1f HS |
53 | { |
54 | switch (l3num) { | |
55 | case AF_INET: | |
56 | return *addr32 & *mask; | |
57 | case AF_INET6: | |
58 | return hmark_addr6_mask(addr32, mask); | |
59 | } | |
60 | return 0; | |
61 | } | |
62 | ||
d1992b16 HS |
63 | static inline void hmark_swap_ports(union hmark_ports *uports, |
64 | const struct xt_hmark_info *info) | |
65 | { | |
66 | union hmark_ports hp; | |
67 | u16 src, dst; | |
68 | ||
69 | hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; | |
70 | src = ntohs(hp.b16.src); | |
71 | dst = ntohs(hp.b16.dst); | |
72 | ||
73 | if (dst > src) | |
74 | uports->v32 = (dst << 16) | src; | |
75 | else | |
76 | uports->v32 = (src << 16) | dst; | |
77 | } | |
78 | ||
cf308a1f HS |
79 | static int |
80 | hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, | |
81 | const struct xt_hmark_info *info) | |
82 | { | |
83 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
84 | enum ip_conntrack_info ctinfo; | |
85 | struct nf_conn *ct = nf_ct_get(skb, &ctinfo); | |
86 | struct nf_conntrack_tuple *otuple; | |
87 | struct nf_conntrack_tuple *rtuple; | |
88 | ||
ab8bc7ed | 89 | if (ct == NULL) |
cf308a1f HS |
90 | return -1; |
91 | ||
92 | otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
93 | rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
94 | ||
d1992b16 HS |
95 | t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, |
96 | info->src_mask.ip6); | |
97 | t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, | |
98 | info->dst_mask.ip6); | |
cf308a1f HS |
99 | |
100 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) | |
101 | return 0; | |
102 | ||
103 | t->proto = nf_ct_protonum(ct); | |
104 | if (t->proto != IPPROTO_ICMP) { | |
d1992b16 HS |
105 | t->uports.b16.src = otuple->src.u.all; |
106 | t->uports.b16.dst = rtuple->src.u.all; | |
107 | hmark_swap_ports(&t->uports, info); | |
cf308a1f HS |
108 | } |
109 | ||
110 | return 0; | |
111 | #else | |
112 | return -1; | |
113 | #endif | |
114 | } | |
115 | ||
d1992b16 HS |
116 | /* This hash function is endian independent, to ensure consistent hashing if |
117 | * the cluster is composed of big and little endian systems. */ | |
cf308a1f HS |
118 | static inline u32 |
119 | hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) | |
120 | { | |
121 | u32 hash; | |
d1992b16 HS |
122 | u32 src = ntohl(t->src); |
123 | u32 dst = ntohl(t->dst); | |
cf308a1f | 124 | |
d1992b16 HS |
125 | if (dst < src) |
126 | swap(src, dst); | |
cf308a1f | 127 | |
d1992b16 | 128 | hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); |
cf308a1f HS |
129 | hash = hash ^ (t->proto & info->proto_mask); |
130 | ||
8fc54f68 | 131 | return reciprocal_scale(hash, info->hmodulus) + info->hoffset; |
cf308a1f HS |
132 | } |
133 | ||
134 | static void | |
135 | hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, | |
136 | struct hmark_tuple *t, const struct xt_hmark_info *info) | |
137 | { | |
138 | int protoff; | |
139 | ||
140 | protoff = proto_ports_offset(t->proto); | |
141 | if (protoff < 0) | |
142 | return; | |
143 | ||
144 | nhoff += protoff; | |
145 | if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) | |
146 | return; | |
147 | ||
d1992b16 | 148 | hmark_swap_ports(&t->uports, info); |
cf308a1f HS |
149 | } |
150 | ||
151 | #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) | |
152 | static int get_inner6_hdr(const struct sk_buff *skb, int *offset) | |
153 | { | |
154 | struct icmp6hdr *icmp6h, _ih6; | |
155 | ||
156 | icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6); | |
157 | if (icmp6h == NULL) | |
158 | return 0; | |
159 | ||
160 | if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) { | |
161 | *offset += sizeof(struct icmp6hdr); | |
162 | return 1; | |
163 | } | |
164 | return 0; | |
165 | } | |
166 | ||
167 | static int | |
168 | hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, | |
169 | const struct xt_hmark_info *info) | |
170 | { | |
171 | struct ipv6hdr *ip6, _ip6; | |
f8f62675 | 172 | int flag = IP6_FH_F_AUTH; |
cf308a1f HS |
173 | unsigned int nhoff = 0; |
174 | u16 fragoff = 0; | |
175 | int nexthdr; | |
176 | ||
177 | ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb)); | |
178 | nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); | |
179 | if (nexthdr < 0) | |
180 | return 0; | |
181 | /* No need to check for icmp errors on fragments */ | |
f8f62675 | 182 | if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6)) |
cf308a1f HS |
183 | goto noicmp; |
184 | /* Use inner header in case of ICMP errors */ | |
185 | if (get_inner6_hdr(skb, &nhoff)) { | |
186 | ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6); | |
187 | if (ip6 == NULL) | |
188 | return -1; | |
189 | /* If AH present, use SPI like in ESP. */ | |
f8f62675 | 190 | flag = IP6_FH_F_AUTH; |
cf308a1f HS |
191 | nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag); |
192 | if (nexthdr < 0) | |
193 | return -1; | |
194 | } | |
195 | noicmp: | |
d1992b16 HS |
196 | t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); |
197 | t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); | |
cf308a1f HS |
198 | |
199 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) | |
200 | return 0; | |
201 | ||
202 | t->proto = nexthdr; | |
203 | if (t->proto == IPPROTO_ICMPV6) | |
204 | return 0; | |
205 | ||
f8f62675 | 206 | if (flag & IP6_FH_F_FRAG) |
cf308a1f HS |
207 | return 0; |
208 | ||
209 | hmark_set_tuple_ports(skb, nhoff, t, info); | |
210 | return 0; | |
211 | } | |
212 | ||
213 | static unsigned int | |
214 | hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par) | |
215 | { | |
216 | const struct xt_hmark_info *info = par->targinfo; | |
217 | struct hmark_tuple t; | |
218 | ||
219 | memset(&t, 0, sizeof(struct hmark_tuple)); | |
220 | ||
221 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { | |
222 | if (hmark_ct_set_htuple(skb, &t, info) < 0) | |
223 | return XT_CONTINUE; | |
224 | } else { | |
225 | if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0) | |
226 | return XT_CONTINUE; | |
227 | } | |
228 | ||
229 | skb->mark = hmark_hash(&t, info); | |
230 | return XT_CONTINUE; | |
231 | } | |
232 | #endif | |
233 | ||
234 | static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff) | |
235 | { | |
236 | const struct icmphdr *icmph; | |
237 | struct icmphdr _ih; | |
238 | ||
239 | /* Not enough header? */ | |
240 | icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih); | |
58618115 | 241 | if (icmph == NULL || icmph->type > NR_ICMP_TYPES) |
cf308a1f HS |
242 | return 0; |
243 | ||
244 | /* Error message? */ | |
245 | if (icmph->type != ICMP_DEST_UNREACH && | |
246 | icmph->type != ICMP_SOURCE_QUENCH && | |
247 | icmph->type != ICMP_TIME_EXCEEDED && | |
248 | icmph->type != ICMP_PARAMETERPROB && | |
249 | icmph->type != ICMP_REDIRECT) | |
250 | return 0; | |
251 | ||
252 | *nhoff += iphsz + sizeof(_ih); | |
253 | return 1; | |
254 | } | |
255 | ||
256 | static int | |
257 | hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, | |
258 | const struct xt_hmark_info *info) | |
259 | { | |
260 | struct iphdr *ip, _ip; | |
261 | int nhoff = skb_network_offset(skb); | |
262 | ||
263 | ip = (struct iphdr *) (skb->data + nhoff); | |
264 | if (ip->protocol == IPPROTO_ICMP) { | |
265 | /* Use inner header in case of ICMP errors */ | |
266 | if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) { | |
267 | ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip); | |
268 | if (ip == NULL) | |
269 | return -1; | |
270 | } | |
271 | } | |
272 | ||
d1992b16 HS |
273 | t->src = ip->saddr & info->src_mask.ip; |
274 | t->dst = ip->daddr & info->dst_mask.ip; | |
cf308a1f HS |
275 | |
276 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) | |
277 | return 0; | |
278 | ||
279 | t->proto = ip->protocol; | |
280 | ||
281 | /* ICMP has no ports, skip */ | |
282 | if (t->proto == IPPROTO_ICMP) | |
283 | return 0; | |
284 | ||
285 | /* follow-up fragments don't contain ports, skip all fragments */ | |
286 | if (ip->frag_off & htons(IP_MF | IP_OFFSET)) | |
287 | return 0; | |
288 | ||
289 | hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info); | |
290 | ||
291 | return 0; | |
292 | } | |
293 | ||
294 | static unsigned int | |
295 | hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) | |
296 | { | |
297 | const struct xt_hmark_info *info = par->targinfo; | |
298 | struct hmark_tuple t; | |
299 | ||
300 | memset(&t, 0, sizeof(struct hmark_tuple)); | |
301 | ||
302 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) { | |
303 | if (hmark_ct_set_htuple(skb, &t, info) < 0) | |
304 | return XT_CONTINUE; | |
305 | } else { | |
306 | if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0) | |
307 | return XT_CONTINUE; | |
308 | } | |
309 | ||
310 | skb->mark = hmark_hash(&t, info); | |
311 | return XT_CONTINUE; | |
312 | } | |
313 | ||
314 | static int hmark_tg_check(const struct xt_tgchk_param *par) | |
315 | { | |
316 | const struct xt_hmark_info *info = par->targinfo; | |
c08e5e1e | 317 | const char *errmsg = "proto mask must be zero with L3 mode"; |
cf308a1f | 318 | |
0cc9501f | 319 | if (!info->hmodulus) |
cf308a1f | 320 | return -EINVAL; |
0cc9501f | 321 | |
cf308a1f | 322 | if (info->proto_mask && |
c08e5e1e FW |
323 | (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) |
324 | goto err; | |
325 | ||
cf308a1f HS |
326 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && |
327 | (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | | |
0cc9501f | 328 | XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) |
cf308a1f | 329 | return -EINVAL; |
0cc9501f | 330 | |
cf308a1f HS |
331 | if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && |
332 | (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | | |
333 | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { | |
c08e5e1e FW |
334 | errmsg = "spi-set and port-set can't be combined"; |
335 | goto err; | |
cf308a1f HS |
336 | } |
337 | return 0; | |
c08e5e1e FW |
338 | err: |
339 | pr_info_ratelimited("%s\n", errmsg); | |
340 | return -EINVAL; | |
cf308a1f HS |
341 | } |
342 | ||
343 | static struct xt_target hmark_tg_reg[] __read_mostly = { | |
344 | { | |
345 | .name = "HMARK", | |
346 | .family = NFPROTO_IPV4, | |
347 | .target = hmark_tg_v4, | |
348 | .targetsize = sizeof(struct xt_hmark_info), | |
349 | .checkentry = hmark_tg_check, | |
350 | .me = THIS_MODULE, | |
351 | }, | |
352 | #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) | |
353 | { | |
354 | .name = "HMARK", | |
355 | .family = NFPROTO_IPV6, | |
356 | .target = hmark_tg_v6, | |
357 | .targetsize = sizeof(struct xt_hmark_info), | |
358 | .checkentry = hmark_tg_check, | |
359 | .me = THIS_MODULE, | |
360 | }, | |
361 | #endif | |
362 | }; | |
363 | ||
364 | static int __init hmark_tg_init(void) | |
365 | { | |
366 | return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); | |
367 | } | |
368 | ||
369 | static void __exit hmark_tg_exit(void) | |
370 | { | |
371 | xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg)); | |
372 | } | |
373 | ||
374 | module_init(hmark_tg_init); | |
375 | module_exit(hmark_tg_exit); |