Merge branch 'core-objtool-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-block.git] / net / netfilter / nf_flow_table_offload.c
1 #include <linux/kernel.h>
2 #include <linux/init.h>
3 #include <linux/module.h>
4 #include <linux/netfilter.h>
5 #include <linux/rhashtable.h>
6 #include <linux/netdevice.h>
7 #include <linux/tc_act/tc_csum.h>
8 #include <net/flow_offload.h>
9 #include <net/netfilter/nf_flow_table.h>
10 #include <net/netfilter/nf_conntrack.h>
11 #include <net/netfilter/nf_conntrack_core.h>
12 #include <net/netfilter/nf_conntrack_tuple.h>
13
14 static struct work_struct nf_flow_offload_work;
15 static DEFINE_SPINLOCK(flow_offload_pending_list_lock);
16 static LIST_HEAD(flow_offload_pending_list);
17
18 struct flow_offload_work {
19         struct list_head        list;
20         enum flow_cls_command   cmd;
21         int                     priority;
22         struct nf_flowtable     *flowtable;
23         struct flow_offload     *flow;
24 };
25
26 struct nf_flow_key {
27         struct flow_dissector_key_meta                  meta;
28         struct flow_dissector_key_control               control;
29         struct flow_dissector_key_basic                 basic;
30         union {
31                 struct flow_dissector_key_ipv4_addrs    ipv4;
32                 struct flow_dissector_key_ipv6_addrs    ipv6;
33         };
34         struct flow_dissector_key_tcp                   tcp;
35         struct flow_dissector_key_ports                 tp;
36 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
37
38 struct nf_flow_match {
39         struct flow_dissector   dissector;
40         struct nf_flow_key      key;
41         struct nf_flow_key      mask;
42 };
43
44 struct nf_flow_rule {
45         struct nf_flow_match    match;
46         struct flow_rule        *rule;
47 };
48
49 #define NF_FLOW_DISSECTOR(__match, __type, __field)     \
50         (__match)->dissector.offset[__type] =           \
51                 offsetof(struct nf_flow_key, __field)
52
53 static int nf_flow_rule_match(struct nf_flow_match *match,
54                               const struct flow_offload_tuple *tuple)
55 {
56         struct nf_flow_key *mask = &match->mask;
57         struct nf_flow_key *key = &match->key;
58
59         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
60         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
61         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
62         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
63         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
64         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
65         NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
66
67         key->meta.ingress_ifindex = tuple->iifidx;
68         mask->meta.ingress_ifindex = 0xffffffff;
69
70         switch (tuple->l3proto) {
71         case AF_INET:
72                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
73                 key->basic.n_proto = htons(ETH_P_IP);
74                 key->ipv4.src = tuple->src_v4.s_addr;
75                 mask->ipv4.src = 0xffffffff;
76                 key->ipv4.dst = tuple->dst_v4.s_addr;
77                 mask->ipv4.dst = 0xffffffff;
78                 break;
79        case AF_INET6:
80                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
81                 key->basic.n_proto = htons(ETH_P_IPV6);
82                 key->ipv6.src = tuple->src_v6;
83                 memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
84                 key->ipv6.dst = tuple->dst_v6;
85                 memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
86                 break;
87         default:
88                 return -EOPNOTSUPP;
89         }
90         mask->control.addr_type = 0xffff;
91         match->dissector.used_keys |= BIT(key->control.addr_type);
92         mask->basic.n_proto = 0xffff;
93
94         switch (tuple->l4proto) {
95         case IPPROTO_TCP:
96                 key->tcp.flags = 0;
97                 mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
98                 match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
99                 break;
100         case IPPROTO_UDP:
101                 break;
102         default:
103                 return -EOPNOTSUPP;
104         }
105
106         key->basic.ip_proto = tuple->l4proto;
107         mask->basic.ip_proto = 0xff;
108
109         key->tp.src = tuple->src_port;
110         mask->tp.src = 0xffff;
111         key->tp.dst = tuple->dst_port;
112         mask->tp.dst = 0xffff;
113
114         match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
115                                       BIT(FLOW_DISSECTOR_KEY_CONTROL) |
116                                       BIT(FLOW_DISSECTOR_KEY_BASIC) |
117                                       BIT(FLOW_DISSECTOR_KEY_PORTS);
118         return 0;
119 }
120
121 static void flow_offload_mangle(struct flow_action_entry *entry,
122                                 enum flow_action_mangle_base htype, u32 offset,
123                                 const __be32 *value, const __be32 *mask)
124 {
125         entry->id = FLOW_ACTION_MANGLE;
126         entry->mangle.htype = htype;
127         entry->mangle.offset = offset;
128         memcpy(&entry->mangle.mask, mask, sizeof(u32));
129         memcpy(&entry->mangle.val, value, sizeof(u32));
130 }
131
132 static inline struct flow_action_entry *
133 flow_action_entry_next(struct nf_flow_rule *flow_rule)
134 {
135         int i = flow_rule->rule->action.num_entries++;
136
137         return &flow_rule->rule->action.entries[i];
138 }
139
140 static int flow_offload_eth_src(struct net *net,
141                                 const struct flow_offload *flow,
142                                 enum flow_offload_tuple_dir dir,
143                                 struct nf_flow_rule *flow_rule)
144 {
145         const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
146         struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
147         struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
148         struct net_device *dev;
149         u32 mask, val;
150         u16 val16;
151
152         dev = dev_get_by_index(net, tuple->iifidx);
153         if (!dev)
154                 return -ENOENT;
155
156         mask = ~0xffff0000;
157         memcpy(&val16, dev->dev_addr, 2);
158         val = val16 << 16;
159         flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
160                             &val, &mask);
161
162         mask = ~0xffffffff;
163         memcpy(&val, dev->dev_addr + 2, 4);
164         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
165                             &val, &mask);
166         dev_put(dev);
167
168         return 0;
169 }
170
171 static int flow_offload_eth_dst(struct net *net,
172                                 const struct flow_offload *flow,
173                                 enum flow_offload_tuple_dir dir,
174                                 struct nf_flow_rule *flow_rule)
175 {
176         struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
177         struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
178         const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
179         const struct dst_entry *dst_cache;
180         unsigned char ha[ETH_ALEN];
181         struct neighbour *n;
182         u32 mask, val;
183         u8 nud_state;
184         u16 val16;
185
186         dst_cache = flow->tuplehash[dir].tuple.dst_cache;
187         n = dst_neigh_lookup(dst_cache, daddr);
188         if (!n)
189                 return -ENOENT;
190
191         read_lock_bh(&n->lock);
192         nud_state = n->nud_state;
193         ether_addr_copy(ha, n->ha);
194         read_unlock_bh(&n->lock);
195
196         if (!(nud_state & NUD_VALID)) {
197                 neigh_release(n);
198                 return -ENOENT;
199         }
200
201         mask = ~0xffffffff;
202         memcpy(&val, ha, 4);
203         flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
204                             &val, &mask);
205
206         mask = ~0x0000ffff;
207         memcpy(&val16, ha + 4, 2);
208         val = val16;
209         flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
210                             &val, &mask);
211         neigh_release(n);
212
213         return 0;
214 }
215
216 static void flow_offload_ipv4_snat(struct net *net,
217                                    const struct flow_offload *flow,
218                                    enum flow_offload_tuple_dir dir,
219                                    struct nf_flow_rule *flow_rule)
220 {
221         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
222         u32 mask = ~htonl(0xffffffff);
223         __be32 addr;
224         u32 offset;
225
226         switch (dir) {
227         case FLOW_OFFLOAD_DIR_ORIGINAL:
228                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
229                 offset = offsetof(struct iphdr, saddr);
230                 break;
231         case FLOW_OFFLOAD_DIR_REPLY:
232                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
233                 offset = offsetof(struct iphdr, daddr);
234                 break;
235         default:
236                 return;
237         }
238
239         flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
240                             &addr, &mask);
241 }
242
243 static void flow_offload_ipv4_dnat(struct net *net,
244                                    const struct flow_offload *flow,
245                                    enum flow_offload_tuple_dir dir,
246                                    struct nf_flow_rule *flow_rule)
247 {
248         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
249         u32 mask = ~htonl(0xffffffff);
250         __be32 addr;
251         u32 offset;
252
253         switch (dir) {
254         case FLOW_OFFLOAD_DIR_ORIGINAL:
255                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
256                 offset = offsetof(struct iphdr, daddr);
257                 break;
258         case FLOW_OFFLOAD_DIR_REPLY:
259                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
260                 offset = offsetof(struct iphdr, saddr);
261                 break;
262         default:
263                 return;
264         }
265
266         flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
267                             &addr, &mask);
268 }
269
270 static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
271                                      unsigned int offset,
272                                      const __be32 *addr, const __be32 *mask)
273 {
274         struct flow_action_entry *entry;
275         int i;
276
277         for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
278                 entry = flow_action_entry_next(flow_rule);
279                 flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
280                                     offset + i, &addr[i], mask);
281         }
282 }
283
284 static void flow_offload_ipv6_snat(struct net *net,
285                                    const struct flow_offload *flow,
286                                    enum flow_offload_tuple_dir dir,
287                                    struct nf_flow_rule *flow_rule)
288 {
289         u32 mask = ~htonl(0xffffffff);
290         const __be32 *addr;
291         u32 offset;
292
293         switch (dir) {
294         case FLOW_OFFLOAD_DIR_ORIGINAL:
295                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
296                 offset = offsetof(struct ipv6hdr, saddr);
297                 break;
298         case FLOW_OFFLOAD_DIR_REPLY:
299                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
300                 offset = offsetof(struct ipv6hdr, daddr);
301                 break;
302         default:
303                 return;
304         }
305
306         flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
307 }
308
309 static void flow_offload_ipv6_dnat(struct net *net,
310                                    const struct flow_offload *flow,
311                                    enum flow_offload_tuple_dir dir,
312                                    struct nf_flow_rule *flow_rule)
313 {
314         u32 mask = ~htonl(0xffffffff);
315         const __be32 *addr;
316         u32 offset;
317
318         switch (dir) {
319         case FLOW_OFFLOAD_DIR_ORIGINAL:
320                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
321                 offset = offsetof(struct ipv6hdr, daddr);
322                 break;
323         case FLOW_OFFLOAD_DIR_REPLY:
324                 addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
325                 offset = offsetof(struct ipv6hdr, saddr);
326                 break;
327         default:
328                 return;
329         }
330
331         flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
332 }
333
334 static int flow_offload_l4proto(const struct flow_offload *flow)
335 {
336         u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
337         u8 type = 0;
338
339         switch (protonum) {
340         case IPPROTO_TCP:
341                 type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
342                 break;
343         case IPPROTO_UDP:
344                 type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
345                 break;
346         default:
347                 break;
348         }
349
350         return type;
351 }
352
353 static void flow_offload_port_snat(struct net *net,
354                                    const struct flow_offload *flow,
355                                    enum flow_offload_tuple_dir dir,
356                                    struct nf_flow_rule *flow_rule)
357 {
358         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
359         u32 mask, port;
360         u32 offset;
361
362         switch (dir) {
363         case FLOW_OFFLOAD_DIR_ORIGINAL:
364                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
365                 offset = 0; /* offsetof(struct tcphdr, source); */
366                 port = htonl(port << 16);
367                 mask = ~htonl(0xffff0000);
368                 break;
369         case FLOW_OFFLOAD_DIR_REPLY:
370                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
371                 offset = 0; /* offsetof(struct tcphdr, dest); */
372                 port = htonl(port);
373                 mask = ~htonl(0xffff);
374                 break;
375         default:
376                 return;
377         }
378
379         flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
380                             &port, &mask);
381 }
382
383 static void flow_offload_port_dnat(struct net *net,
384                                    const struct flow_offload *flow,
385                                    enum flow_offload_tuple_dir dir,
386                                    struct nf_flow_rule *flow_rule)
387 {
388         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
389         u32 mask, port;
390         u32 offset;
391
392         switch (dir) {
393         case FLOW_OFFLOAD_DIR_ORIGINAL:
394                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
395                 offset = 0; /* offsetof(struct tcphdr, dest); */
396                 port = htonl(port);
397                 mask = ~htonl(0xffff);
398                 break;
399         case FLOW_OFFLOAD_DIR_REPLY:
400                 port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
401                 offset = 0; /* offsetof(struct tcphdr, source); */
402                 port = htonl(port << 16);
403                 mask = ~htonl(0xffff0000);
404                 break;
405         default:
406                 return;
407         }
408
409         flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
410                             &port, &mask);
411 }
412
413 static void flow_offload_ipv4_checksum(struct net *net,
414                                        const struct flow_offload *flow,
415                                        struct nf_flow_rule *flow_rule)
416 {
417         u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
418         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
419
420         entry->id = FLOW_ACTION_CSUM;
421         entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
422
423         switch (protonum) {
424         case IPPROTO_TCP:
425                 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
426                 break;
427         case IPPROTO_UDP:
428                 entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
429                 break;
430         }
431 }
432
433 static void flow_offload_redirect(const struct flow_offload *flow,
434                                   enum flow_offload_tuple_dir dir,
435                                   struct nf_flow_rule *flow_rule)
436 {
437         struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
438         struct rtable *rt;
439
440         rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
441         entry->id = FLOW_ACTION_REDIRECT;
442         entry->dev = rt->dst.dev;
443         dev_hold(rt->dst.dev);
444 }
445
446 int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
447                             enum flow_offload_tuple_dir dir,
448                             struct nf_flow_rule *flow_rule)
449 {
450         if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
451             flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
452                 return -1;
453
454         if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
455                 flow_offload_ipv4_snat(net, flow, dir, flow_rule);
456                 flow_offload_port_snat(net, flow, dir, flow_rule);
457         }
458         if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
459                 flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
460                 flow_offload_port_dnat(net, flow, dir, flow_rule);
461         }
462         if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
463             test_bit(NF_FLOW_DNAT, &flow->flags))
464                 flow_offload_ipv4_checksum(net, flow, flow_rule);
465
466         flow_offload_redirect(flow, dir, flow_rule);
467
468         return 0;
469 }
470 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
471
472 int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
473                             enum flow_offload_tuple_dir dir,
474                             struct nf_flow_rule *flow_rule)
475 {
476         if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
477             flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
478                 return -1;
479
480         if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
481                 flow_offload_ipv6_snat(net, flow, dir, flow_rule);
482                 flow_offload_port_snat(net, flow, dir, flow_rule);
483         }
484         if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
485                 flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
486                 flow_offload_port_dnat(net, flow, dir, flow_rule);
487         }
488
489         flow_offload_redirect(flow, dir, flow_rule);
490
491         return 0;
492 }
493 EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
494
495 #define NF_FLOW_RULE_ACTION_MAX 16
496
497 static struct nf_flow_rule *
498 nf_flow_offload_rule_alloc(struct net *net,
499                            const struct flow_offload_work *offload,
500                            enum flow_offload_tuple_dir dir)
501 {
502         const struct nf_flowtable *flowtable = offload->flowtable;
503         const struct flow_offload *flow = offload->flow;
504         const struct flow_offload_tuple *tuple;
505         struct nf_flow_rule *flow_rule;
506         int err = -ENOMEM;
507
508         flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
509         if (!flow_rule)
510                 goto err_flow;
511
512         flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
513         if (!flow_rule->rule)
514                 goto err_flow_rule;
515
516         flow_rule->rule->match.dissector = &flow_rule->match.dissector;
517         flow_rule->rule->match.mask = &flow_rule->match.mask;
518         flow_rule->rule->match.key = &flow_rule->match.key;
519
520         tuple = &flow->tuplehash[dir].tuple;
521         err = nf_flow_rule_match(&flow_rule->match, tuple);
522         if (err < 0)
523                 goto err_flow_match;
524
525         flow_rule->rule->action.num_entries = 0;
526         if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
527                 goto err_flow_match;
528
529         return flow_rule;
530
531 err_flow_match:
532         kfree(flow_rule->rule);
533 err_flow_rule:
534         kfree(flow_rule);
535 err_flow:
536         return NULL;
537 }
538
539 static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
540 {
541         struct flow_action_entry *entry;
542         int i;
543
544         for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
545                 entry = &flow_rule->rule->action.entries[i];
546                 if (entry->id != FLOW_ACTION_REDIRECT)
547                         continue;
548
549                 dev_put(entry->dev);
550         }
551         kfree(flow_rule->rule);
552         kfree(flow_rule);
553 }
554
555 static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
556 {
557         int i;
558
559         for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
560                 __nf_flow_offload_destroy(flow_rule[i]);
561 }
562
563 static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
564                                  struct nf_flow_rule *flow_rule[])
565 {
566         struct net *net = read_pnet(&offload->flowtable->net);
567
568         flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
569                                                   FLOW_OFFLOAD_DIR_ORIGINAL);
570         if (!flow_rule[0])
571                 return -ENOMEM;
572
573         flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
574                                                   FLOW_OFFLOAD_DIR_REPLY);
575         if (!flow_rule[1]) {
576                 __nf_flow_offload_destroy(flow_rule[0]);
577                 return -ENOMEM;
578         }
579
580         return 0;
581 }
582
583 static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
584                                  __be16 proto, int priority,
585                                  enum flow_cls_command cmd,
586                                  const struct flow_offload_tuple *tuple,
587                                  struct netlink_ext_ack *extack)
588 {
589         cls_flow->common.protocol = proto;
590         cls_flow->common.prio = priority;
591         cls_flow->common.extack = extack;
592         cls_flow->command = cmd;
593         cls_flow->cookie = (unsigned long)tuple;
594 }
595
596 static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
597                                  struct flow_offload *flow,
598                                  struct nf_flow_rule *flow_rule,
599                                  enum flow_offload_tuple_dir dir,
600                                  int priority, int cmd,
601                                  struct list_head *block_cb_list)
602 {
603         struct flow_cls_offload cls_flow = {};
604         struct flow_block_cb *block_cb;
605         struct netlink_ext_ack extack;
606         __be16 proto = ETH_P_ALL;
607         int err, i = 0;
608
609         nf_flow_offload_init(&cls_flow, proto, priority, cmd,
610                              &flow->tuplehash[dir].tuple, &extack);
611         if (cmd == FLOW_CLS_REPLACE)
612                 cls_flow.rule = flow_rule->rule;
613
614         list_for_each_entry(block_cb, block_cb_list, list) {
615                 err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
616                                    block_cb->cb_priv);
617                 if (err < 0)
618                         continue;
619
620                 i++;
621         }
622
623         return i;
624 }
625
626 static int flow_offload_tuple_add(struct flow_offload_work *offload,
627                                   struct nf_flow_rule *flow_rule,
628                                   enum flow_offload_tuple_dir dir)
629 {
630         return nf_flow_offload_tuple(offload->flowtable, offload->flow,
631                                      flow_rule, dir, offload->priority,
632                                      FLOW_CLS_REPLACE,
633                                      &offload->flowtable->flow_block.cb_list);
634 }
635
636 static void flow_offload_tuple_del(struct flow_offload_work *offload,
637                                    enum flow_offload_tuple_dir dir)
638 {
639         nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
640                               offload->priority, FLOW_CLS_DESTROY,
641                               &offload->flowtable->flow_block.cb_list);
642 }
643
644 static int flow_offload_rule_add(struct flow_offload_work *offload,
645                                  struct nf_flow_rule *flow_rule[])
646 {
647         int ok_count = 0;
648
649         ok_count += flow_offload_tuple_add(offload, flow_rule[0],
650                                            FLOW_OFFLOAD_DIR_ORIGINAL);
651         ok_count += flow_offload_tuple_add(offload, flow_rule[1],
652                                            FLOW_OFFLOAD_DIR_REPLY);
653         if (ok_count == 0)
654                 return -ENOENT;
655
656         return 0;
657 }
658
659 static void flow_offload_work_add(struct flow_offload_work *offload)
660 {
661         struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
662         int err;
663
664         err = nf_flow_offload_alloc(offload, flow_rule);
665         if (err < 0)
666                 return;
667
668         err = flow_offload_rule_add(offload, flow_rule);
669         if (err < 0)
670                 set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
671
672         nf_flow_offload_destroy(flow_rule);
673 }
674
675 static void flow_offload_work_del(struct flow_offload_work *offload)
676 {
677         flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
678         flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
679         set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
680 }
681
682 static void flow_offload_tuple_stats(struct flow_offload_work *offload,
683                                      enum flow_offload_tuple_dir dir,
684                                      struct flow_stats *stats)
685 {
686         struct nf_flowtable *flowtable = offload->flowtable;
687         struct flow_cls_offload cls_flow = {};
688         struct flow_block_cb *block_cb;
689         struct netlink_ext_ack extack;
690         __be16 proto = ETH_P_ALL;
691
692         nf_flow_offload_init(&cls_flow, proto, offload->priority,
693                              FLOW_CLS_STATS,
694                              &offload->flow->tuplehash[dir].tuple, &extack);
695
696         list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
697                 block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
698         memcpy(stats, &cls_flow.stats, sizeof(*stats));
699 }
700
701 static void flow_offload_work_stats(struct flow_offload_work *offload)
702 {
703         struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
704         u64 lastused;
705
706         flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
707         flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
708
709         lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
710         offload->flow->timeout = max_t(u64, offload->flow->timeout,
711                                        lastused + NF_FLOW_TIMEOUT);
712 }
713
714 static void flow_offload_work_handler(struct work_struct *work)
715 {
716         struct flow_offload_work *offload, *next;
717         LIST_HEAD(offload_pending_list);
718
719         spin_lock_bh(&flow_offload_pending_list_lock);
720         list_replace_init(&flow_offload_pending_list, &offload_pending_list);
721         spin_unlock_bh(&flow_offload_pending_list_lock);
722
723         list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
724                 switch (offload->cmd) {
725                 case FLOW_CLS_REPLACE:
726                         flow_offload_work_add(offload);
727                         break;
728                 case FLOW_CLS_DESTROY:
729                         flow_offload_work_del(offload);
730                         break;
731                 case FLOW_CLS_STATS:
732                         flow_offload_work_stats(offload);
733                         break;
734                 default:
735                         WARN_ON_ONCE(1);
736                 }
737                 list_del(&offload->list);
738                 kfree(offload);
739         }
740 }
741
742 static void flow_offload_queue_work(struct flow_offload_work *offload)
743 {
744         spin_lock_bh(&flow_offload_pending_list_lock);
745         list_add_tail(&offload->list, &flow_offload_pending_list);
746         spin_unlock_bh(&flow_offload_pending_list_lock);
747
748         schedule_work(&nf_flow_offload_work);
749 }
750
751 static struct flow_offload_work *
752 nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
753                            struct flow_offload *flow, unsigned int cmd)
754 {
755         struct flow_offload_work *offload;
756
757         offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
758         if (!offload)
759                 return NULL;
760
761         offload->cmd = cmd;
762         offload->flow = flow;
763         offload->priority = flowtable->priority;
764         offload->flowtable = flowtable;
765
766         return offload;
767 }
768
769
770 void nf_flow_offload_add(struct nf_flowtable *flowtable,
771                          struct flow_offload *flow)
772 {
773         struct flow_offload_work *offload;
774
775         offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
776         if (!offload)
777                 return;
778
779         flow_offload_queue_work(offload);
780 }
781
782 void nf_flow_offload_del(struct nf_flowtable *flowtable,
783                          struct flow_offload *flow)
784 {
785         struct flow_offload_work *offload;
786
787         offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
788         if (!offload)
789                 return;
790
791         set_bit(NF_FLOW_HW_DYING, &flow->flags);
792         flow_offload_queue_work(offload);
793 }
794
795 void nf_flow_offload_stats(struct nf_flowtable *flowtable,
796                            struct flow_offload *flow)
797 {
798         struct flow_offload_work *offload;
799         __s32 delta;
800
801         delta = nf_flow_timeout_delta(flow->timeout);
802         if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10))
803                 return;
804
805         offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
806         if (!offload)
807                 return;
808
809         flow_offload_queue_work(offload);
810 }
811
812 void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
813 {
814         if (nf_flowtable_hw_offload(flowtable))
815                 flush_work(&nf_flow_offload_work);
816 }
817
818 static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
819                                      struct flow_block_offload *bo,
820                                      enum flow_block_command cmd)
821 {
822         struct flow_block_cb *block_cb, *next;
823         int err = 0;
824
825         switch (cmd) {
826         case FLOW_BLOCK_BIND:
827                 list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
828                 break;
829         case FLOW_BLOCK_UNBIND:
830                 list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
831                         list_del(&block_cb->list);
832                         flow_block_cb_free(block_cb);
833                 }
834                 break;
835         default:
836                 WARN_ON_ONCE(1);
837                 err = -EOPNOTSUPP;
838         }
839
840         return err;
841 }
842
843 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
844                                      struct nf_flowtable *flowtable,
845                                      struct net_device *dev,
846                                      enum flow_block_command cmd,
847                                      struct netlink_ext_ack *extack)
848 {
849         int err;
850
851         if (!dev->netdev_ops->ndo_setup_tc)
852                 return -EOPNOTSUPP;
853
854         memset(bo, 0, sizeof(*bo));
855         bo->net         = dev_net(dev);
856         bo->block       = &flowtable->flow_block;
857         bo->command     = cmd;
858         bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
859         bo->extack      = extack;
860         INIT_LIST_HEAD(&bo->cb_list);
861
862         err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
863         if (err < 0)
864                 return err;
865
866         return 0;
867 }
868
869 int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
870                                 struct net_device *dev,
871                                 enum flow_block_command cmd)
872 {
873         struct netlink_ext_ack extack = {};
874         struct flow_block_offload bo;
875         int err;
876
877         if (!nf_flowtable_hw_offload(flowtable))
878                 return 0;
879
880         err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd, &extack);
881         if (err < 0)
882                 return err;
883
884         return nf_flow_table_block_setup(flowtable, &bo, cmd);
885 }
886 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
887
888 int nf_flow_table_offload_init(void)
889 {
890         INIT_WORK(&nf_flow_offload_work, flow_offload_work_handler);
891
892         return 0;
893 }
894
895 void nf_flow_table_offload_exit(void)
896 {
897         struct flow_offload_work *offload, *next;
898         LIST_HEAD(offload_pending_list);
899
900         cancel_work_sync(&nf_flow_offload_work);
901
902         list_for_each_entry_safe(offload, next, &offload_pending_list, list) {
903                 list_del(&offload->list);
904                 kfree(offload);
905         }
906 }