Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
a3c90f7a PNA |
2 | #include <linux/kernel.h> |
3 | #include <linux/module.h> | |
4 | #include <linux/init.h> | |
5 | #include <linux/netlink.h> | |
6 | #include <linux/netfilter.h> | |
7 | #include <linux/workqueue.h> | |
8 | #include <linux/spinlock.h> | |
40d102cd | 9 | #include <linux/netfilter/nf_conntrack_common.h> |
a3c90f7a PNA |
10 | #include <linux/netfilter/nf_tables.h> |
11 | #include <net/ip.h> /* for ipv4 options. */ | |
12 | #include <net/netfilter/nf_tables.h> | |
13 | #include <net/netfilter/nf_tables_core.h> | |
14 | #include <net/netfilter/nf_conntrack_core.h> | |
40d102cd | 15 | #include <net/netfilter/nf_conntrack_extend.h> |
a3c90f7a PNA |
16 | #include <net/netfilter/nf_flow_table.h> |
17 | ||
18 | struct nft_flow_offload { | |
19 | struct nft_flowtable *flowtable; | |
20 | }; | |
21 | ||
22 | static int nft_flow_route(const struct nft_pktinfo *pkt, | |
23 | const struct nf_conn *ct, | |
24 | struct nf_flow_route *route, | |
25 | enum ip_conntrack_dir dir) | |
26 | { | |
27 | struct dst_entry *this_dst = skb_dst(pkt->skb); | |
28 | struct dst_entry *other_dst = NULL; | |
29 | struct flowi fl; | |
30 | ||
31 | memset(&fl, 0, sizeof(fl)); | |
32 | switch (nft_pf(pkt)) { | |
33 | case NFPROTO_IPV4: | |
a799aea0 | 34 | fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip; |
10f4e765 | 35 | fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex; |
a3c90f7a PNA |
36 | break; |
37 | case NFPROTO_IPV6: | |
a799aea0 | 38 | fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6; |
10f4e765 | 39 | fl.u.ip6.flowi6_oif = nft_in(pkt)->ifindex; |
a3c90f7a PNA |
40 | break; |
41 | } | |
42 | ||
43 | nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt)); | |
44 | if (!other_dst) | |
45 | return -ENOENT; | |
46 | ||
47 | route->tuple[dir].dst = this_dst; | |
a3c90f7a | 48 | route->tuple[!dir].dst = other_dst; |
a3c90f7a PNA |
49 | |
50 | return 0; | |
51 | } | |
52 | ||
69aeb538 | 53 | static bool nft_flow_offload_skip(struct sk_buff *skb, int family) |
a3c90f7a | 54 | { |
a3c90f7a PNA |
55 | if (skb_sec_path(skb)) |
56 | return true; | |
57 | ||
69aeb538 FW |
58 | if (family == NFPROTO_IPV4) { |
59 | const struct ip_options *opt; | |
60 | ||
61 | opt = &(IPCB(skb)->opt); | |
62 | ||
63 | if (unlikely(opt->optlen)) | |
64 | return true; | |
65 | } | |
66 | ||
a3c90f7a PNA |
67 | return false; |
68 | } | |
69 | ||
70 | static void nft_flow_offload_eval(const struct nft_expr *expr, | |
71 | struct nft_regs *regs, | |
72 | const struct nft_pktinfo *pkt) | |
73 | { | |
74 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
75 | struct nf_flowtable *flowtable = &priv->flowtable->data; | |
dfe42be1 | 76 | struct tcphdr _tcph, *tcph = NULL; |
a3c90f7a PNA |
77 | enum ip_conntrack_info ctinfo; |
78 | struct nf_flow_route route; | |
79 | struct flow_offload *flow; | |
80 | enum ip_conntrack_dir dir; | |
81 | struct nf_conn *ct; | |
82 | int ret; | |
83 | ||
69aeb538 | 84 | if (nft_flow_offload_skip(pkt->skb, nft_pf(pkt))) |
a3c90f7a PNA |
85 | goto out; |
86 | ||
87 | ct = nf_ct_get(pkt->skb, &ctinfo); | |
88 | if (!ct) | |
89 | goto out; | |
90 | ||
91 | switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) { | |
92 | case IPPROTO_TCP: | |
dfe42be1 PNA |
93 | tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, |
94 | sizeof(_tcph), &_tcph); | |
95 | if (unlikely(!tcph || tcph->fin || tcph->rst)) | |
96 | goto out; | |
8437a620 | 97 | break; |
a3c90f7a PNA |
98 | case IPPROTO_UDP: |
99 | break; | |
100 | default: | |
101 | goto out; | |
102 | } | |
103 | ||
91a9048f | 104 | if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) || |
c4617214 | 105 | ct->status & (IPS_SEQ_ADJUST | IPS_NAT_CLASH)) |
a3c90f7a PNA |
106 | goto out; |
107 | ||
270a8a29 | 108 | if (!nf_ct_is_confirmed(ct)) |
a3c90f7a PNA |
109 | goto out; |
110 | ||
111 | if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status)) | |
112 | goto out; | |
113 | ||
114 | dir = CTINFO2DIR(ctinfo); | |
115 | if (nft_flow_route(pkt, ct, &route, dir) < 0) | |
116 | goto err_flow_route; | |
117 | ||
f1363e05 | 118 | flow = flow_offload_alloc(ct); |
a3c90f7a PNA |
119 | if (!flow) |
120 | goto err_flow_alloc; | |
121 | ||
f1363e05 PNA |
122 | if (flow_offload_route_init(flow, &route) < 0) |
123 | goto err_flow_add; | |
124 | ||
dfe42be1 | 125 | if (tcph) { |
8437a620 FW |
126 | ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; |
127 | ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL; | |
128 | } | |
129 | ||
a3c90f7a PNA |
130 | ret = flow_offload_add(flowtable, flow); |
131 | if (ret < 0) | |
132 | goto err_flow_add; | |
133 | ||
26a302af | 134 | dst_release(route.tuple[!dir].dst); |
a3c90f7a PNA |
135 | return; |
136 | ||
137 | err_flow_add: | |
138 | flow_offload_free(flow); | |
139 | err_flow_alloc: | |
140 | dst_release(route.tuple[!dir].dst); | |
141 | err_flow_route: | |
142 | clear_bit(IPS_OFFLOAD_BIT, &ct->status); | |
143 | out: | |
144 | regs->verdict.code = NFT_BREAK; | |
145 | } | |
146 | ||
147 | static int nft_flow_offload_validate(const struct nft_ctx *ctx, | |
148 | const struct nft_expr *expr, | |
149 | const struct nft_data **data) | |
150 | { | |
151 | unsigned int hook_mask = (1 << NF_INET_FORWARD); | |
152 | ||
153 | return nft_chain_validate_hooks(ctx->chain, hook_mask); | |
154 | } | |
155 | ||
14c41586 PNA |
156 | static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = { |
157 | [NFTA_FLOW_TABLE_NAME] = { .type = NLA_STRING, | |
158 | .len = NFT_NAME_MAXLEN - 1 }, | |
159 | }; | |
160 | ||
a3c90f7a PNA |
161 | static int nft_flow_offload_init(const struct nft_ctx *ctx, |
162 | const struct nft_expr *expr, | |
163 | const struct nlattr * const tb[]) | |
164 | { | |
165 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
166 | u8 genmask = nft_genmask_next(ctx->net); | |
167 | struct nft_flowtable *flowtable; | |
168 | ||
169 | if (!tb[NFTA_FLOW_TABLE_NAME]) | |
170 | return -EINVAL; | |
171 | ||
cac20fcd PNA |
172 | flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME], |
173 | genmask); | |
a3c90f7a PNA |
174 | if (IS_ERR(flowtable)) |
175 | return PTR_ERR(flowtable); | |
176 | ||
177 | priv->flowtable = flowtable; | |
178 | flowtable->use++; | |
179 | ||
36596dad | 180 | return nf_ct_netns_get(ctx->net, ctx->family); |
a3c90f7a PNA |
181 | } |
182 | ||
9b05b6e1 LGL |
183 | static void nft_flow_offload_deactivate(const struct nft_ctx *ctx, |
184 | const struct nft_expr *expr, | |
185 | enum nft_trans_phase phase) | |
186 | { | |
187 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
188 | ||
189 | nf_tables_deactivate_flowtable(ctx, priv->flowtable, phase); | |
190 | } | |
191 | ||
192 | static void nft_flow_offload_activate(const struct nft_ctx *ctx, | |
193 | const struct nft_expr *expr) | |
194 | { | |
195 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
196 | ||
197 | priv->flowtable->use++; | |
198 | } | |
199 | ||
a3c90f7a PNA |
200 | static void nft_flow_offload_destroy(const struct nft_ctx *ctx, |
201 | const struct nft_expr *expr) | |
202 | { | |
36596dad | 203 | nf_ct_netns_put(ctx->net, ctx->family); |
a3c90f7a PNA |
204 | } |
205 | ||
206 | static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr) | |
207 | { | |
208 | struct nft_flow_offload *priv = nft_expr_priv(expr); | |
209 | ||
210 | if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name)) | |
211 | goto nla_put_failure; | |
212 | ||
213 | return 0; | |
214 | ||
215 | nla_put_failure: | |
216 | return -1; | |
217 | } | |
218 | ||
219 | static struct nft_expr_type nft_flow_offload_type; | |
220 | static const struct nft_expr_ops nft_flow_offload_ops = { | |
221 | .type = &nft_flow_offload_type, | |
222 | .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)), | |
223 | .eval = nft_flow_offload_eval, | |
224 | .init = nft_flow_offload_init, | |
9b05b6e1 LGL |
225 | .activate = nft_flow_offload_activate, |
226 | .deactivate = nft_flow_offload_deactivate, | |
a3c90f7a PNA |
227 | .destroy = nft_flow_offload_destroy, |
228 | .validate = nft_flow_offload_validate, | |
229 | .dump = nft_flow_offload_dump, | |
230 | }; | |
231 | ||
232 | static struct nft_expr_type nft_flow_offload_type __read_mostly = { | |
233 | .name = "flow_offload", | |
234 | .ops = &nft_flow_offload_ops, | |
14c41586 | 235 | .policy = nft_flow_offload_policy, |
a3c90f7a PNA |
236 | .maxattr = NFTA_FLOW_MAX, |
237 | .owner = THIS_MODULE, | |
238 | }; | |
239 | ||
a3c90f7a PNA |
240 | static int flow_offload_netdev_event(struct notifier_block *this, |
241 | unsigned long event, void *ptr) | |
242 | { | |
243 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
244 | ||
245 | if (event != NETDEV_DOWN) | |
246 | return NOTIFY_DONE; | |
247 | ||
5f1be84a | 248 | nf_flow_table_cleanup(dev); |
a3c90f7a PNA |
249 | |
250 | return NOTIFY_DONE; | |
251 | } | |
252 | ||
253 | static struct notifier_block flow_offload_netdev_notifier = { | |
254 | .notifier_call = flow_offload_netdev_event, | |
255 | }; | |
256 | ||
257 | static int __init nft_flow_offload_module_init(void) | |
258 | { | |
259 | int err; | |
260 | ||
584eab29 TY |
261 | err = register_netdevice_notifier(&flow_offload_netdev_notifier); |
262 | if (err) | |
263 | goto err; | |
a3c90f7a PNA |
264 | |
265 | err = nft_register_expr(&nft_flow_offload_type); | |
266 | if (err < 0) | |
267 | goto register_expr; | |
268 | ||
269 | return 0; | |
270 | ||
271 | register_expr: | |
272 | unregister_netdevice_notifier(&flow_offload_netdev_notifier); | |
584eab29 | 273 | err: |
a3c90f7a PNA |
274 | return err; |
275 | } | |
276 | ||
277 | static void __exit nft_flow_offload_module_exit(void) | |
278 | { | |
a3c90f7a PNA |
279 | nft_unregister_expr(&nft_flow_offload_type); |
280 | unregister_netdevice_notifier(&flow_offload_netdev_notifier); | |
a3c90f7a PNA |
281 | } |
282 | ||
283 | module_init(nft_flow_offload_module_init); | |
284 | module_exit(nft_flow_offload_module_exit); | |
285 | ||
286 | MODULE_LICENSE("GPL"); | |
287 | MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); | |
288 | MODULE_ALIAS_NFT_EXPR("flow_offload"); | |
4cacc395 | 289 | MODULE_DESCRIPTION("nftables hardware flow offload module"); |