e1000e: start network tx queue only when link is up
[linux-2.6-block.git] / net / netfilter / xt_HMARK.c
CommitLineData
cf308a1f
HS
1/*
2 * xt_HMARK - Netfilter module to set mark by means of hashing
3 *
4 * (C) 2012 by Hans Schillstrom <hans.schillstrom@ericsson.com>
5 * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 */
11
c08e5e1e
FW
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13
cf308a1f
HS
14#include <linux/module.h>
15#include <linux/skbuff.h>
16#include <linux/icmp.h>
17
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/xt_HMARK.h>
20
21#include <net/ip.h>
22#if IS_ENABLED(CONFIG_NF_CONNTRACK)
23#include <net/netfilter/nf_conntrack.h>
24#endif
25#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
26#include <net/ipv6.h>
27#include <linux/netfilter_ipv6/ip6_tables.h>
28#endif
29
30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Hans Schillstrom <hans.schillstrom@ericsson.com>");
32MODULE_DESCRIPTION("Xtables: packet marking using hash calculation");
33MODULE_ALIAS("ipt_HMARK");
34MODULE_ALIAS("ip6t_HMARK");
35
36struct hmark_tuple {
d1992b16
HS
37 __be32 src;
38 __be32 dst;
cf308a1f 39 union hmark_ports uports;
d1992b16 40 u8 proto;
cf308a1f
HS
41};
42
d1992b16 43static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
cf308a1f
HS
44{
45 return (addr32[0] & mask[0]) ^
46 (addr32[1] & mask[1]) ^
47 (addr32[2] & mask[2]) ^
48 (addr32[3] & mask[3]);
49}
50
d1992b16
HS
51static inline __be32
52hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
cf308a1f
HS
53{
54 switch (l3num) {
55 case AF_INET:
56 return *addr32 & *mask;
57 case AF_INET6:
58 return hmark_addr6_mask(addr32, mask);
59 }
60 return 0;
61}
62
d1992b16
HS
63static inline void hmark_swap_ports(union hmark_ports *uports,
64 const struct xt_hmark_info *info)
65{
66 union hmark_ports hp;
67 u16 src, dst;
68
69 hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
70 src = ntohs(hp.b16.src);
71 dst = ntohs(hp.b16.dst);
72
73 if (dst > src)
74 uports->v32 = (dst << 16) | src;
75 else
76 uports->v32 = (src << 16) | dst;
77}
78
cf308a1f
HS
79static int
80hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
81 const struct xt_hmark_info *info)
82{
83#if IS_ENABLED(CONFIG_NF_CONNTRACK)
84 enum ip_conntrack_info ctinfo;
85 struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
86 struct nf_conntrack_tuple *otuple;
87 struct nf_conntrack_tuple *rtuple;
88
ab8bc7ed 89 if (ct == NULL)
cf308a1f
HS
90 return -1;
91
92 otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
93 rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
94
d1992b16
HS
95 t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
96 info->src_mask.ip6);
97 t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
98 info->dst_mask.ip6);
cf308a1f
HS
99
100 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
101 return 0;
102
103 t->proto = nf_ct_protonum(ct);
104 if (t->proto != IPPROTO_ICMP) {
d1992b16
HS
105 t->uports.b16.src = otuple->src.u.all;
106 t->uports.b16.dst = rtuple->src.u.all;
107 hmark_swap_ports(&t->uports, info);
cf308a1f
HS
108 }
109
110 return 0;
111#else
112 return -1;
113#endif
114}
115
d1992b16
HS
116/* This hash function is endian independent, to ensure consistent hashing if
117 * the cluster is composed of big and little endian systems. */
cf308a1f
HS
118static inline u32
119hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
120{
121 u32 hash;
d1992b16
HS
122 u32 src = ntohl(t->src);
123 u32 dst = ntohl(t->dst);
cf308a1f 124
d1992b16
HS
125 if (dst < src)
126 swap(src, dst);
cf308a1f 127
d1992b16 128 hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
cf308a1f
HS
129 hash = hash ^ (t->proto & info->proto_mask);
130
8fc54f68 131 return reciprocal_scale(hash, info->hmodulus) + info->hoffset;
cf308a1f
HS
132}
133
134static void
135hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
136 struct hmark_tuple *t, const struct xt_hmark_info *info)
137{
138 int protoff;
139
140 protoff = proto_ports_offset(t->proto);
141 if (protoff < 0)
142 return;
143
144 nhoff += protoff;
145 if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
146 return;
147
d1992b16 148 hmark_swap_ports(&t->uports, info);
cf308a1f
HS
149}
150
151#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
152static int get_inner6_hdr(const struct sk_buff *skb, int *offset)
153{
154 struct icmp6hdr *icmp6h, _ih6;
155
156 icmp6h = skb_header_pointer(skb, *offset, sizeof(_ih6), &_ih6);
157 if (icmp6h == NULL)
158 return 0;
159
160 if (icmp6h->icmp6_type && icmp6h->icmp6_type < 128) {
161 *offset += sizeof(struct icmp6hdr);
162 return 1;
163 }
164 return 0;
165}
166
167static int
168hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
169 const struct xt_hmark_info *info)
170{
171 struct ipv6hdr *ip6, _ip6;
f8f62675 172 int flag = IP6_FH_F_AUTH;
cf308a1f
HS
173 unsigned int nhoff = 0;
174 u16 fragoff = 0;
175 int nexthdr;
176
177 ip6 = (struct ipv6hdr *) (skb->data + skb_network_offset(skb));
178 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
179 if (nexthdr < 0)
180 return 0;
181 /* No need to check for icmp errors on fragments */
f8f62675 182 if ((flag & IP6_FH_F_FRAG) || (nexthdr != IPPROTO_ICMPV6))
cf308a1f
HS
183 goto noicmp;
184 /* Use inner header in case of ICMP errors */
185 if (get_inner6_hdr(skb, &nhoff)) {
186 ip6 = skb_header_pointer(skb, nhoff, sizeof(_ip6), &_ip6);
187 if (ip6 == NULL)
188 return -1;
189 /* If AH present, use SPI like in ESP. */
f8f62675 190 flag = IP6_FH_F_AUTH;
cf308a1f
HS
191 nexthdr = ipv6_find_hdr(skb, &nhoff, -1, &fragoff, &flag);
192 if (nexthdr < 0)
193 return -1;
194 }
195noicmp:
d1992b16
HS
196 t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
197 t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
cf308a1f
HS
198
199 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
200 return 0;
201
202 t->proto = nexthdr;
203 if (t->proto == IPPROTO_ICMPV6)
204 return 0;
205
f8f62675 206 if (flag & IP6_FH_F_FRAG)
cf308a1f
HS
207 return 0;
208
209 hmark_set_tuple_ports(skb, nhoff, t, info);
210 return 0;
211}
212
213static unsigned int
214hmark_tg_v6(struct sk_buff *skb, const struct xt_action_param *par)
215{
216 const struct xt_hmark_info *info = par->targinfo;
217 struct hmark_tuple t;
218
219 memset(&t, 0, sizeof(struct hmark_tuple));
220
221 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
222 if (hmark_ct_set_htuple(skb, &t, info) < 0)
223 return XT_CONTINUE;
224 } else {
225 if (hmark_pkt_set_htuple_ipv6(skb, &t, info) < 0)
226 return XT_CONTINUE;
227 }
228
229 skb->mark = hmark_hash(&t, info);
230 return XT_CONTINUE;
231}
232#endif
233
234static int get_inner_hdr(const struct sk_buff *skb, int iphsz, int *nhoff)
235{
236 const struct icmphdr *icmph;
237 struct icmphdr _ih;
238
239 /* Not enough header? */
240 icmph = skb_header_pointer(skb, *nhoff + iphsz, sizeof(_ih), &_ih);
58618115 241 if (icmph == NULL || icmph->type > NR_ICMP_TYPES)
cf308a1f
HS
242 return 0;
243
244 /* Error message? */
245 if (icmph->type != ICMP_DEST_UNREACH &&
246 icmph->type != ICMP_SOURCE_QUENCH &&
247 icmph->type != ICMP_TIME_EXCEEDED &&
248 icmph->type != ICMP_PARAMETERPROB &&
249 icmph->type != ICMP_REDIRECT)
250 return 0;
251
252 *nhoff += iphsz + sizeof(_ih);
253 return 1;
254}
255
256static int
257hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
258 const struct xt_hmark_info *info)
259{
260 struct iphdr *ip, _ip;
261 int nhoff = skb_network_offset(skb);
262
263 ip = (struct iphdr *) (skb->data + nhoff);
264 if (ip->protocol == IPPROTO_ICMP) {
265 /* Use inner header in case of ICMP errors */
266 if (get_inner_hdr(skb, ip->ihl * 4, &nhoff)) {
267 ip = skb_header_pointer(skb, nhoff, sizeof(_ip), &_ip);
268 if (ip == NULL)
269 return -1;
270 }
271 }
272
d1992b16
HS
273 t->src = ip->saddr & info->src_mask.ip;
274 t->dst = ip->daddr & info->dst_mask.ip;
cf308a1f
HS
275
276 if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
277 return 0;
278
279 t->proto = ip->protocol;
280
281 /* ICMP has no ports, skip */
282 if (t->proto == IPPROTO_ICMP)
283 return 0;
284
285 /* follow-up fragments don't contain ports, skip all fragments */
286 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
287 return 0;
288
289 hmark_set_tuple_ports(skb, (ip->ihl * 4) + nhoff, t, info);
290
291 return 0;
292}
293
294static unsigned int
295hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par)
296{
297 const struct xt_hmark_info *info = par->targinfo;
298 struct hmark_tuple t;
299
300 memset(&t, 0, sizeof(struct hmark_tuple));
301
302 if (info->flags & XT_HMARK_FLAG(XT_HMARK_CT)) {
303 if (hmark_ct_set_htuple(skb, &t, info) < 0)
304 return XT_CONTINUE;
305 } else {
306 if (hmark_pkt_set_htuple_ipv4(skb, &t, info) < 0)
307 return XT_CONTINUE;
308 }
309
310 skb->mark = hmark_hash(&t, info);
311 return XT_CONTINUE;
312}
313
314static int hmark_tg_check(const struct xt_tgchk_param *par)
315{
316 const struct xt_hmark_info *info = par->targinfo;
c08e5e1e 317 const char *errmsg = "proto mask must be zero with L3 mode";
cf308a1f 318
0cc9501f 319 if (!info->hmodulus)
cf308a1f 320 return -EINVAL;
0cc9501f 321
cf308a1f 322 if (info->proto_mask &&
c08e5e1e
FW
323 (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)))
324 goto err;
325
cf308a1f
HS
326 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) &&
327 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) |
0cc9501f 328 XT_HMARK_FLAG(XT_HMARK_DPORT_MASK))))
cf308a1f 329 return -EINVAL;
0cc9501f 330
cf308a1f
HS
331 if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) &&
332 (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) |
333 XT_HMARK_FLAG(XT_HMARK_DPORT)))) {
c08e5e1e
FW
334 errmsg = "spi-set and port-set can't be combined";
335 goto err;
cf308a1f
HS
336 }
337 return 0;
c08e5e1e
FW
338err:
339 pr_info_ratelimited("%s\n", errmsg);
340 return -EINVAL;
cf308a1f
HS
341}
342
343static struct xt_target hmark_tg_reg[] __read_mostly = {
344 {
345 .name = "HMARK",
346 .family = NFPROTO_IPV4,
347 .target = hmark_tg_v4,
348 .targetsize = sizeof(struct xt_hmark_info),
349 .checkentry = hmark_tg_check,
350 .me = THIS_MODULE,
351 },
352#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
353 {
354 .name = "HMARK",
355 .family = NFPROTO_IPV6,
356 .target = hmark_tg_v6,
357 .targetsize = sizeof(struct xt_hmark_info),
358 .checkentry = hmark_tg_check,
359 .me = THIS_MODULE,
360 },
361#endif
362};
363
364static int __init hmark_tg_init(void)
365{
366 return xt_register_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
367}
368
369static void __exit hmark_tg_exit(void)
370{
371 xt_unregister_targets(hmark_tg_reg, ARRAY_SIZE(hmark_tg_reg));
372}
373
374module_init(hmark_tg_init);
375module_exit(hmark_tg_exit);