net: sched: ensure tc flower reoffload takes filter ref
[linux-block.git] / net / sched / cls_flower.c
1 /*
2  * net/sched/cls_flower.c               Flower classifier
3  *
4  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/init.h>
14 #include <linux/module.h>
15 #include <linux/rhashtable.h>
16 #include <linux/workqueue.h>
17 #include <linux/refcount.h>
18
19 #include <linux/if_ether.h>
20 #include <linux/in6.h>
21 #include <linux/ip.h>
22 #include <linux/mpls.h>
23
24 #include <net/sch_generic.h>
25 #include <net/pkt_cls.h>
26 #include <net/ip.h>
27 #include <net/flow_dissector.h>
28 #include <net/geneve.h>
29
30 #include <net/dst.h>
31 #include <net/dst_metadata.h>
32
33 struct fl_flow_key {
34         int     indev_ifindex;
35         struct flow_dissector_key_control control;
36         struct flow_dissector_key_control enc_control;
37         struct flow_dissector_key_basic basic;
38         struct flow_dissector_key_eth_addrs eth;
39         struct flow_dissector_key_vlan vlan;
40         struct flow_dissector_key_vlan cvlan;
41         union {
42                 struct flow_dissector_key_ipv4_addrs ipv4;
43                 struct flow_dissector_key_ipv6_addrs ipv6;
44         };
45         struct flow_dissector_key_ports tp;
46         struct flow_dissector_key_icmp icmp;
47         struct flow_dissector_key_arp arp;
48         struct flow_dissector_key_keyid enc_key_id;
49         union {
50                 struct flow_dissector_key_ipv4_addrs enc_ipv4;
51                 struct flow_dissector_key_ipv6_addrs enc_ipv6;
52         };
53         struct flow_dissector_key_ports enc_tp;
54         struct flow_dissector_key_mpls mpls;
55         struct flow_dissector_key_tcp tcp;
56         struct flow_dissector_key_ip ip;
57         struct flow_dissector_key_ip enc_ip;
58         struct flow_dissector_key_enc_opts enc_opts;
59         struct flow_dissector_key_ports tp_min;
60         struct flow_dissector_key_ports tp_max;
61 } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
62
63 struct fl_flow_mask_range {
64         unsigned short int start;
65         unsigned short int end;
66 };
67
68 struct fl_flow_mask {
69         struct fl_flow_key key;
70         struct fl_flow_mask_range range;
71         u32 flags;
72         struct rhash_head ht_node;
73         struct rhashtable ht;
74         struct rhashtable_params filter_ht_params;
75         struct flow_dissector dissector;
76         struct list_head filters;
77         struct rcu_work rwork;
78         struct list_head list;
79         refcount_t refcnt;
80 };
81
82 struct fl_flow_tmplt {
83         struct fl_flow_key dummy_key;
84         struct fl_flow_key mask;
85         struct flow_dissector dissector;
86         struct tcf_chain *chain;
87 };
88
89 struct cls_fl_head {
90         struct rhashtable ht;
91         spinlock_t masks_lock; /* Protect masks list */
92         struct list_head masks;
93         struct rcu_work rwork;
94         struct idr handle_idr;
95 };
96
97 struct cls_fl_filter {
98         struct fl_flow_mask *mask;
99         struct rhash_head ht_node;
100         struct fl_flow_key mkey;
101         struct tcf_exts exts;
102         struct tcf_result res;
103         struct fl_flow_key key;
104         struct list_head list;
105         u32 handle;
106         u32 flags;
107         u32 in_hw_count;
108         struct rcu_work rwork;
109         struct net_device *hw_dev;
110         /* Flower classifier is unlocked, which means that its reference counter
111          * can be changed concurrently without any kind of external
112          * synchronization. Use atomic reference counter to be concurrency-safe.
113          */
114         refcount_t refcnt;
115         bool deleted;
116 };
117
118 static const struct rhashtable_params mask_ht_params = {
119         .key_offset = offsetof(struct fl_flow_mask, key),
120         .key_len = sizeof(struct fl_flow_key),
121         .head_offset = offsetof(struct fl_flow_mask, ht_node),
122         .automatic_shrinking = true,
123 };
124
125 static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
126 {
127         return mask->range.end - mask->range.start;
128 }
129
130 static void fl_mask_update_range(struct fl_flow_mask *mask)
131 {
132         const u8 *bytes = (const u8 *) &mask->key;
133         size_t size = sizeof(mask->key);
134         size_t i, first = 0, last;
135
136         for (i = 0; i < size; i++) {
137                 if (bytes[i]) {
138                         first = i;
139                         break;
140                 }
141         }
142         last = first;
143         for (i = size - 1; i != first; i--) {
144                 if (bytes[i]) {
145                         last = i;
146                         break;
147                 }
148         }
149         mask->range.start = rounddown(first, sizeof(long));
150         mask->range.end = roundup(last + 1, sizeof(long));
151 }
152
153 static void *fl_key_get_start(struct fl_flow_key *key,
154                               const struct fl_flow_mask *mask)
155 {
156         return (u8 *) key + mask->range.start;
157 }
158
159 static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
160                               struct fl_flow_mask *mask)
161 {
162         const long *lkey = fl_key_get_start(key, mask);
163         const long *lmask = fl_key_get_start(&mask->key, mask);
164         long *lmkey = fl_key_get_start(mkey, mask);
165         int i;
166
167         for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
168                 *lmkey++ = *lkey++ & *lmask++;
169 }
170
171 static bool fl_mask_fits_tmplt(struct fl_flow_tmplt *tmplt,
172                                struct fl_flow_mask *mask)
173 {
174         const long *lmask = fl_key_get_start(&mask->key, mask);
175         const long *ltmplt;
176         int i;
177
178         if (!tmplt)
179                 return true;
180         ltmplt = fl_key_get_start(&tmplt->mask, mask);
181         for (i = 0; i < fl_mask_range(mask); i += sizeof(long)) {
182                 if (~*ltmplt++ & *lmask++)
183                         return false;
184         }
185         return true;
186 }
187
188 static void fl_clear_masked_range(struct fl_flow_key *key,
189                                   struct fl_flow_mask *mask)
190 {
191         memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
192 }
193
194 static bool fl_range_port_dst_cmp(struct cls_fl_filter *filter,
195                                   struct fl_flow_key *key,
196                                   struct fl_flow_key *mkey)
197 {
198         __be16 min_mask, max_mask, min_val, max_val;
199
200         min_mask = htons(filter->mask->key.tp_min.dst);
201         max_mask = htons(filter->mask->key.tp_max.dst);
202         min_val = htons(filter->key.tp_min.dst);
203         max_val = htons(filter->key.tp_max.dst);
204
205         if (min_mask && max_mask) {
206                 if (htons(key->tp.dst) < min_val ||
207                     htons(key->tp.dst) > max_val)
208                         return false;
209
210                 /* skb does not have min and max values */
211                 mkey->tp_min.dst = filter->mkey.tp_min.dst;
212                 mkey->tp_max.dst = filter->mkey.tp_max.dst;
213         }
214         return true;
215 }
216
217 static bool fl_range_port_src_cmp(struct cls_fl_filter *filter,
218                                   struct fl_flow_key *key,
219                                   struct fl_flow_key *mkey)
220 {
221         __be16 min_mask, max_mask, min_val, max_val;
222
223         min_mask = htons(filter->mask->key.tp_min.src);
224         max_mask = htons(filter->mask->key.tp_max.src);
225         min_val = htons(filter->key.tp_min.src);
226         max_val = htons(filter->key.tp_max.src);
227
228         if (min_mask && max_mask) {
229                 if (htons(key->tp.src) < min_val ||
230                     htons(key->tp.src) > max_val)
231                         return false;
232
233                 /* skb does not have min and max values */
234                 mkey->tp_min.src = filter->mkey.tp_min.src;
235                 mkey->tp_max.src = filter->mkey.tp_max.src;
236         }
237         return true;
238 }
239
240 static struct cls_fl_filter *__fl_lookup(struct fl_flow_mask *mask,
241                                          struct fl_flow_key *mkey)
242 {
243         return rhashtable_lookup_fast(&mask->ht, fl_key_get_start(mkey, mask),
244                                       mask->filter_ht_params);
245 }
246
247 static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
248                                              struct fl_flow_key *mkey,
249                                              struct fl_flow_key *key)
250 {
251         struct cls_fl_filter *filter, *f;
252
253         list_for_each_entry_rcu(filter, &mask->filters, list) {
254                 if (!fl_range_port_dst_cmp(filter, key, mkey))
255                         continue;
256
257                 if (!fl_range_port_src_cmp(filter, key, mkey))
258                         continue;
259
260                 f = __fl_lookup(mask, mkey);
261                 if (f)
262                         return f;
263         }
264         return NULL;
265 }
266
267 static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
268                                        struct fl_flow_key *mkey,
269                                        struct fl_flow_key *key)
270 {
271         if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
272                 return fl_lookup_range(mask, mkey, key);
273
274         return __fl_lookup(mask, mkey);
275 }
276
277 static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
278                        struct tcf_result *res)
279 {
280         struct cls_fl_head *head = rcu_dereference_bh(tp->root);
281         struct cls_fl_filter *f;
282         struct fl_flow_mask *mask;
283         struct fl_flow_key skb_key;
284         struct fl_flow_key skb_mkey;
285
286         list_for_each_entry_rcu(mask, &head->masks, list) {
287                 fl_clear_masked_range(&skb_key, mask);
288
289                 skb_key.indev_ifindex = skb->skb_iif;
290                 /* skb_flow_dissect() does not set n_proto in case an unknown
291                  * protocol, so do it rather here.
292                  */
293                 skb_key.basic.n_proto = skb->protocol;
294                 skb_flow_dissect_tunnel_info(skb, &mask->dissector, &skb_key);
295                 skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
296
297                 fl_set_masked_key(&skb_mkey, &skb_key, mask);
298
299                 f = fl_lookup(mask, &skb_mkey, &skb_key);
300                 if (f && !tc_skip_sw(f->flags)) {
301                         *res = f->res;
302                         return tcf_exts_exec(skb, &f->exts, res);
303                 }
304         }
305         return -1;
306 }
307
308 static int fl_init(struct tcf_proto *tp)
309 {
310         struct cls_fl_head *head;
311
312         head = kzalloc(sizeof(*head), GFP_KERNEL);
313         if (!head)
314                 return -ENOBUFS;
315
316         spin_lock_init(&head->masks_lock);
317         INIT_LIST_HEAD_RCU(&head->masks);
318         rcu_assign_pointer(tp->root, head);
319         idr_init(&head->handle_idr);
320
321         return rhashtable_init(&head->ht, &mask_ht_params);
322 }
323
324 static void fl_mask_free(struct fl_flow_mask *mask)
325 {
326         WARN_ON(!list_empty(&mask->filters));
327         rhashtable_destroy(&mask->ht);
328         kfree(mask);
329 }
330
331 static void fl_mask_free_work(struct work_struct *work)
332 {
333         struct fl_flow_mask *mask = container_of(to_rcu_work(work),
334                                                  struct fl_flow_mask, rwork);
335
336         fl_mask_free(mask);
337 }
338
339 static bool fl_mask_put(struct cls_fl_head *head, struct fl_flow_mask *mask,
340                         bool async)
341 {
342         if (!refcount_dec_and_test(&mask->refcnt))
343                 return false;
344
345         rhashtable_remove_fast(&head->ht, &mask->ht_node, mask_ht_params);
346
347         spin_lock(&head->masks_lock);
348         list_del_rcu(&mask->list);
349         spin_unlock(&head->masks_lock);
350
351         if (async)
352                 tcf_queue_work(&mask->rwork, fl_mask_free_work);
353         else
354                 fl_mask_free(mask);
355
356         return true;
357 }
358
359 static void __fl_destroy_filter(struct cls_fl_filter *f)
360 {
361         tcf_exts_destroy(&f->exts);
362         tcf_exts_put_net(&f->exts);
363         kfree(f);
364 }
365
366 static void fl_destroy_filter_work(struct work_struct *work)
367 {
368         struct cls_fl_filter *f = container_of(to_rcu_work(work),
369                                         struct cls_fl_filter, rwork);
370
371         __fl_destroy_filter(f);
372 }
373
374 static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f,
375                                  bool rtnl_held, struct netlink_ext_ack *extack)
376 {
377         struct tc_cls_flower_offload cls_flower = {};
378         struct tcf_block *block = tp->chain->block;
379
380         if (!rtnl_held)
381                 rtnl_lock();
382
383         tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
384         cls_flower.command = TC_CLSFLOWER_DESTROY;
385         cls_flower.cookie = (unsigned long) f;
386
387         tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
388         spin_lock(&tp->lock);
389         tcf_block_offload_dec(block, &f->flags);
390         spin_unlock(&tp->lock);
391
392         if (!rtnl_held)
393                 rtnl_unlock();
394 }
395
396 static int fl_hw_replace_filter(struct tcf_proto *tp,
397                                 struct cls_fl_filter *f, bool rtnl_held,
398                                 struct netlink_ext_ack *extack)
399 {
400         struct tc_cls_flower_offload cls_flower = {};
401         struct tcf_block *block = tp->chain->block;
402         bool skip_sw = tc_skip_sw(f->flags);
403         int err = 0;
404
405         if (!rtnl_held)
406                 rtnl_lock();
407
408         cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
409         if (!cls_flower.rule) {
410                 err = -ENOMEM;
411                 goto errout;
412         }
413
414         tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
415         cls_flower.command = TC_CLSFLOWER_REPLACE;
416         cls_flower.cookie = (unsigned long) f;
417         cls_flower.rule->match.dissector = &f->mask->dissector;
418         cls_flower.rule->match.mask = &f->mask->key;
419         cls_flower.rule->match.key = &f->mkey;
420         cls_flower.classid = f->res.classid;
421
422         err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
423         if (err) {
424                 kfree(cls_flower.rule);
425                 if (skip_sw)
426                         NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
427                 else
428                         err = 0;
429                 goto errout;
430         }
431
432         err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
433         kfree(cls_flower.rule);
434
435         if (err < 0) {
436                 fl_hw_destroy_filter(tp, f, true, NULL);
437                 goto errout;
438         } else if (err > 0) {
439                 f->in_hw_count = err;
440                 err = 0;
441                 spin_lock(&tp->lock);
442                 tcf_block_offload_inc(block, &f->flags);
443                 spin_unlock(&tp->lock);
444         }
445
446         if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) {
447                 err = -EINVAL;
448                 goto errout;
449         }
450
451 errout:
452         if (!rtnl_held)
453                 rtnl_unlock();
454
455         return err;
456 }
457
458 static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
459                                bool rtnl_held)
460 {
461         struct tc_cls_flower_offload cls_flower = {};
462         struct tcf_block *block = tp->chain->block;
463
464         if (!rtnl_held)
465                 rtnl_lock();
466
467         tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
468         cls_flower.command = TC_CLSFLOWER_STATS;
469         cls_flower.cookie = (unsigned long) f;
470         cls_flower.classid = f->res.classid;
471
472         tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
473
474         tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
475                               cls_flower.stats.pkts,
476                               cls_flower.stats.lastused);
477
478         if (!rtnl_held)
479                 rtnl_unlock();
480 }
481
482 static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
483 {
484         /* Flower classifier only changes root pointer during init and destroy.
485          * Users must obtain reference to tcf_proto instance before calling its
486          * API, so tp->root pointer is protected from concurrent call to
487          * fl_destroy() by reference counting.
488          */
489         return rcu_dereference_raw(tp->root);
490 }
491
492 static void __fl_put(struct cls_fl_filter *f)
493 {
494         if (!refcount_dec_and_test(&f->refcnt))
495                 return;
496
497         WARN_ON(!f->deleted);
498
499         if (tcf_exts_get_net(&f->exts))
500                 tcf_queue_work(&f->rwork, fl_destroy_filter_work);
501         else
502                 __fl_destroy_filter(f);
503 }
504
505 static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
506 {
507         struct cls_fl_filter *f;
508
509         rcu_read_lock();
510         f = idr_find(&head->handle_idr, handle);
511         if (f && !refcount_inc_not_zero(&f->refcnt))
512                 f = NULL;
513         rcu_read_unlock();
514
515         return f;
516 }
517
518 static struct cls_fl_filter *fl_get_next_filter(struct tcf_proto *tp,
519                                                 unsigned long *handle)
520 {
521         struct cls_fl_head *head = fl_head_dereference(tp);
522         struct cls_fl_filter *f;
523
524         rcu_read_lock();
525         while ((f = idr_get_next_ul(&head->handle_idr, handle))) {
526                 /* don't return filters that are being deleted */
527                 if (refcount_inc_not_zero(&f->refcnt))
528                         break;
529                 ++(*handle);
530         }
531         rcu_read_unlock();
532
533         return f;
534 }
535
536 static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
537                        bool *last, bool rtnl_held,
538                        struct netlink_ext_ack *extack)
539 {
540         struct cls_fl_head *head = fl_head_dereference(tp);
541         bool async = tcf_exts_get_net(&f->exts);
542
543         *last = false;
544
545         spin_lock(&tp->lock);
546         if (f->deleted) {
547                 spin_unlock(&tp->lock);
548                 return -ENOENT;
549         }
550
551         f->deleted = true;
552         rhashtable_remove_fast(&f->mask->ht, &f->ht_node,
553                                f->mask->filter_ht_params);
554         idr_remove(&head->handle_idr, f->handle);
555         list_del_rcu(&f->list);
556         spin_unlock(&tp->lock);
557
558         *last = fl_mask_put(head, f->mask, async);
559         if (!tc_skip_hw(f->flags))
560                 fl_hw_destroy_filter(tp, f, rtnl_held, extack);
561         tcf_unbind_filter(tp, &f->res);
562         __fl_put(f);
563
564         return 0;
565 }
566
567 static void fl_destroy_sleepable(struct work_struct *work)
568 {
569         struct cls_fl_head *head = container_of(to_rcu_work(work),
570                                                 struct cls_fl_head,
571                                                 rwork);
572
573         rhashtable_destroy(&head->ht);
574         kfree(head);
575         module_put(THIS_MODULE);
576 }
577
578 static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
579                        struct netlink_ext_ack *extack)
580 {
581         struct cls_fl_head *head = fl_head_dereference(tp);
582         struct fl_flow_mask *mask, *next_mask;
583         struct cls_fl_filter *f, *next;
584         bool last;
585
586         list_for_each_entry_safe(mask, next_mask, &head->masks, list) {
587                 list_for_each_entry_safe(f, next, &mask->filters, list) {
588                         __fl_delete(tp, f, &last, rtnl_held, extack);
589                         if (last)
590                                 break;
591                 }
592         }
593         idr_destroy(&head->handle_idr);
594
595         __module_get(THIS_MODULE);
596         tcf_queue_work(&head->rwork, fl_destroy_sleepable);
597 }
598
599 static void fl_put(struct tcf_proto *tp, void *arg)
600 {
601         struct cls_fl_filter *f = arg;
602
603         __fl_put(f);
604 }
605
606 static void *fl_get(struct tcf_proto *tp, u32 handle)
607 {
608         struct cls_fl_head *head = fl_head_dereference(tp);
609
610         return __fl_get(head, handle);
611 }
612
613 static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
614         [TCA_FLOWER_UNSPEC]             = { .type = NLA_UNSPEC },
615         [TCA_FLOWER_CLASSID]            = { .type = NLA_U32 },
616         [TCA_FLOWER_INDEV]              = { .type = NLA_STRING,
617                                             .len = IFNAMSIZ },
618         [TCA_FLOWER_KEY_ETH_DST]        = { .len = ETH_ALEN },
619         [TCA_FLOWER_KEY_ETH_DST_MASK]   = { .len = ETH_ALEN },
620         [TCA_FLOWER_KEY_ETH_SRC]        = { .len = ETH_ALEN },
621         [TCA_FLOWER_KEY_ETH_SRC_MASK]   = { .len = ETH_ALEN },
622         [TCA_FLOWER_KEY_ETH_TYPE]       = { .type = NLA_U16 },
623         [TCA_FLOWER_KEY_IP_PROTO]       = { .type = NLA_U8 },
624         [TCA_FLOWER_KEY_IPV4_SRC]       = { .type = NLA_U32 },
625         [TCA_FLOWER_KEY_IPV4_SRC_MASK]  = { .type = NLA_U32 },
626         [TCA_FLOWER_KEY_IPV4_DST]       = { .type = NLA_U32 },
627         [TCA_FLOWER_KEY_IPV4_DST_MASK]  = { .type = NLA_U32 },
628         [TCA_FLOWER_KEY_IPV6_SRC]       = { .len = sizeof(struct in6_addr) },
629         [TCA_FLOWER_KEY_IPV6_SRC_MASK]  = { .len = sizeof(struct in6_addr) },
630         [TCA_FLOWER_KEY_IPV6_DST]       = { .len = sizeof(struct in6_addr) },
631         [TCA_FLOWER_KEY_IPV6_DST_MASK]  = { .len = sizeof(struct in6_addr) },
632         [TCA_FLOWER_KEY_TCP_SRC]        = { .type = NLA_U16 },
633         [TCA_FLOWER_KEY_TCP_DST]        = { .type = NLA_U16 },
634         [TCA_FLOWER_KEY_UDP_SRC]        = { .type = NLA_U16 },
635         [TCA_FLOWER_KEY_UDP_DST]        = { .type = NLA_U16 },
636         [TCA_FLOWER_KEY_VLAN_ID]        = { .type = NLA_U16 },
637         [TCA_FLOWER_KEY_VLAN_PRIO]      = { .type = NLA_U8 },
638         [TCA_FLOWER_KEY_VLAN_ETH_TYPE]  = { .type = NLA_U16 },
639         [TCA_FLOWER_KEY_ENC_KEY_ID]     = { .type = NLA_U32 },
640         [TCA_FLOWER_KEY_ENC_IPV4_SRC]   = { .type = NLA_U32 },
641         [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 },
642         [TCA_FLOWER_KEY_ENC_IPV4_DST]   = { .type = NLA_U32 },
643         [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 },
644         [TCA_FLOWER_KEY_ENC_IPV6_SRC]   = { .len = sizeof(struct in6_addr) },
645         [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) },
646         [TCA_FLOWER_KEY_ENC_IPV6_DST]   = { .len = sizeof(struct in6_addr) },
647         [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) },
648         [TCA_FLOWER_KEY_TCP_SRC_MASK]   = { .type = NLA_U16 },
649         [TCA_FLOWER_KEY_TCP_DST_MASK]   = { .type = NLA_U16 },
650         [TCA_FLOWER_KEY_UDP_SRC_MASK]   = { .type = NLA_U16 },
651         [TCA_FLOWER_KEY_UDP_DST_MASK]   = { .type = NLA_U16 },
652         [TCA_FLOWER_KEY_SCTP_SRC_MASK]  = { .type = NLA_U16 },
653         [TCA_FLOWER_KEY_SCTP_DST_MASK]  = { .type = NLA_U16 },
654         [TCA_FLOWER_KEY_SCTP_SRC]       = { .type = NLA_U16 },
655         [TCA_FLOWER_KEY_SCTP_DST]       = { .type = NLA_U16 },
656         [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT]       = { .type = NLA_U16 },
657         [TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]  = { .type = NLA_U16 },
658         [TCA_FLOWER_KEY_ENC_UDP_DST_PORT]       = { .type = NLA_U16 },
659         [TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]  = { .type = NLA_U16 },
660         [TCA_FLOWER_KEY_FLAGS]          = { .type = NLA_U32 },
661         [TCA_FLOWER_KEY_FLAGS_MASK]     = { .type = NLA_U32 },
662         [TCA_FLOWER_KEY_ICMPV4_TYPE]    = { .type = NLA_U8 },
663         [TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
664         [TCA_FLOWER_KEY_ICMPV4_CODE]    = { .type = NLA_U8 },
665         [TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
666         [TCA_FLOWER_KEY_ICMPV6_TYPE]    = { .type = NLA_U8 },
667         [TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
668         [TCA_FLOWER_KEY_ICMPV6_CODE]    = { .type = NLA_U8 },
669         [TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
670         [TCA_FLOWER_KEY_ARP_SIP]        = { .type = NLA_U32 },
671         [TCA_FLOWER_KEY_ARP_SIP_MASK]   = { .type = NLA_U32 },
672         [TCA_FLOWER_KEY_ARP_TIP]        = { .type = NLA_U32 },
673         [TCA_FLOWER_KEY_ARP_TIP_MASK]   = { .type = NLA_U32 },
674         [TCA_FLOWER_KEY_ARP_OP]         = { .type = NLA_U8 },
675         [TCA_FLOWER_KEY_ARP_OP_MASK]    = { .type = NLA_U8 },
676         [TCA_FLOWER_KEY_ARP_SHA]        = { .len = ETH_ALEN },
677         [TCA_FLOWER_KEY_ARP_SHA_MASK]   = { .len = ETH_ALEN },
678         [TCA_FLOWER_KEY_ARP_THA]        = { .len = ETH_ALEN },
679         [TCA_FLOWER_KEY_ARP_THA_MASK]   = { .len = ETH_ALEN },
680         [TCA_FLOWER_KEY_MPLS_TTL]       = { .type = NLA_U8 },
681         [TCA_FLOWER_KEY_MPLS_BOS]       = { .type = NLA_U8 },
682         [TCA_FLOWER_KEY_MPLS_TC]        = { .type = NLA_U8 },
683         [TCA_FLOWER_KEY_MPLS_LABEL]     = { .type = NLA_U32 },
684         [TCA_FLOWER_KEY_TCP_FLAGS]      = { .type = NLA_U16 },
685         [TCA_FLOWER_KEY_TCP_FLAGS_MASK] = { .type = NLA_U16 },
686         [TCA_FLOWER_KEY_IP_TOS]         = { .type = NLA_U8 },
687         [TCA_FLOWER_KEY_IP_TOS_MASK]    = { .type = NLA_U8 },
688         [TCA_FLOWER_KEY_IP_TTL]         = { .type = NLA_U8 },
689         [TCA_FLOWER_KEY_IP_TTL_MASK]    = { .type = NLA_U8 },
690         [TCA_FLOWER_KEY_CVLAN_ID]       = { .type = NLA_U16 },
691         [TCA_FLOWER_KEY_CVLAN_PRIO]     = { .type = NLA_U8 },
692         [TCA_FLOWER_KEY_CVLAN_ETH_TYPE] = { .type = NLA_U16 },
693         [TCA_FLOWER_KEY_ENC_IP_TOS]     = { .type = NLA_U8 },
694         [TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
695         [TCA_FLOWER_KEY_ENC_IP_TTL]      = { .type = NLA_U8 },
696         [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
697         [TCA_FLOWER_KEY_ENC_OPTS]       = { .type = NLA_NESTED },
698         [TCA_FLOWER_KEY_ENC_OPTS_MASK]  = { .type = NLA_NESTED },
699 };
700
701 static const struct nla_policy
702 enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
703         [TCA_FLOWER_KEY_ENC_OPTS_GENEVE]        = { .type = NLA_NESTED },
704 };
705
706 static const struct nla_policy
707 geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
708         [TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]      = { .type = NLA_U16 },
709         [TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]       = { .type = NLA_U8 },
710         [TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]       = { .type = NLA_BINARY,
711                                                        .len = 128 },
712 };
713
714 static void fl_set_key_val(struct nlattr **tb,
715                            void *val, int val_type,
716                            void *mask, int mask_type, int len)
717 {
718         if (!tb[val_type])
719                 return;
720         memcpy(val, nla_data(tb[val_type]), len);
721         if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
722                 memset(mask, 0xff, len);
723         else
724                 memcpy(mask, nla_data(tb[mask_type]), len);
725 }
726
727 static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
728                                  struct fl_flow_key *mask)
729 {
730         fl_set_key_val(tb, &key->tp_min.dst,
731                        TCA_FLOWER_KEY_PORT_DST_MIN, &mask->tp_min.dst,
732                        TCA_FLOWER_UNSPEC, sizeof(key->tp_min.dst));
733         fl_set_key_val(tb, &key->tp_max.dst,
734                        TCA_FLOWER_KEY_PORT_DST_MAX, &mask->tp_max.dst,
735                        TCA_FLOWER_UNSPEC, sizeof(key->tp_max.dst));
736         fl_set_key_val(tb, &key->tp_min.src,
737                        TCA_FLOWER_KEY_PORT_SRC_MIN, &mask->tp_min.src,
738                        TCA_FLOWER_UNSPEC, sizeof(key->tp_min.src));
739         fl_set_key_val(tb, &key->tp_max.src,
740                        TCA_FLOWER_KEY_PORT_SRC_MAX, &mask->tp_max.src,
741                        TCA_FLOWER_UNSPEC, sizeof(key->tp_max.src));
742
743         if ((mask->tp_min.dst && mask->tp_max.dst &&
744              htons(key->tp_max.dst) <= htons(key->tp_min.dst)) ||
745              (mask->tp_min.src && mask->tp_max.src &&
746               htons(key->tp_max.src) <= htons(key->tp_min.src)))
747                 return -EINVAL;
748
749         return 0;
750 }
751
752 static int fl_set_key_mpls(struct nlattr **tb,
753                            struct flow_dissector_key_mpls *key_val,
754                            struct flow_dissector_key_mpls *key_mask)
755 {
756         if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
757                 key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
758                 key_mask->mpls_ttl = MPLS_TTL_MASK;
759         }
760         if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
761                 u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
762
763                 if (bos & ~MPLS_BOS_MASK)
764                         return -EINVAL;
765                 key_val->mpls_bos = bos;
766                 key_mask->mpls_bos = MPLS_BOS_MASK;
767         }
768         if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
769                 u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
770
771                 if (tc & ~MPLS_TC_MASK)
772                         return -EINVAL;
773                 key_val->mpls_tc = tc;
774                 key_mask->mpls_tc = MPLS_TC_MASK;
775         }
776         if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
777                 u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
778
779                 if (label & ~MPLS_LABEL_MASK)
780                         return -EINVAL;
781                 key_val->mpls_label = label;
782                 key_mask->mpls_label = MPLS_LABEL_MASK;
783         }
784         return 0;
785 }
786
787 static void fl_set_key_vlan(struct nlattr **tb,
788                             __be16 ethertype,
789                             int vlan_id_key, int vlan_prio_key,
790                             struct flow_dissector_key_vlan *key_val,
791                             struct flow_dissector_key_vlan *key_mask)
792 {
793 #define VLAN_PRIORITY_MASK      0x7
794
795         if (tb[vlan_id_key]) {
796                 key_val->vlan_id =
797                         nla_get_u16(tb[vlan_id_key]) & VLAN_VID_MASK;
798                 key_mask->vlan_id = VLAN_VID_MASK;
799         }
800         if (tb[vlan_prio_key]) {
801                 key_val->vlan_priority =
802                         nla_get_u8(tb[vlan_prio_key]) &
803                         VLAN_PRIORITY_MASK;
804                 key_mask->vlan_priority = VLAN_PRIORITY_MASK;
805         }
806         key_val->vlan_tpid = ethertype;
807         key_mask->vlan_tpid = cpu_to_be16(~0);
808 }
809
810 static void fl_set_key_flag(u32 flower_key, u32 flower_mask,
811                             u32 *dissector_key, u32 *dissector_mask,
812                             u32 flower_flag_bit, u32 dissector_flag_bit)
813 {
814         if (flower_mask & flower_flag_bit) {
815                 *dissector_mask |= dissector_flag_bit;
816                 if (flower_key & flower_flag_bit)
817                         *dissector_key |= dissector_flag_bit;
818         }
819 }
820
821 static int fl_set_key_flags(struct nlattr **tb,
822                             u32 *flags_key, u32 *flags_mask)
823 {
824         u32 key, mask;
825
826         /* mask is mandatory for flags */
827         if (!tb[TCA_FLOWER_KEY_FLAGS_MASK])
828                 return -EINVAL;
829
830         key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
831         mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
832
833         *flags_key  = 0;
834         *flags_mask = 0;
835
836         fl_set_key_flag(key, mask, flags_key, flags_mask,
837                         TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
838         fl_set_key_flag(key, mask, flags_key, flags_mask,
839                         TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
840                         FLOW_DIS_FIRST_FRAG);
841
842         return 0;
843 }
844
845 static void fl_set_key_ip(struct nlattr **tb, bool encap,
846                           struct flow_dissector_key_ip *key,
847                           struct flow_dissector_key_ip *mask)
848 {
849         int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
850         int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
851         int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
852         int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
853
854         fl_set_key_val(tb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos));
855         fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
856 }
857
858 static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
859                              int depth, int option_len,
860                              struct netlink_ext_ack *extack)
861 {
862         struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
863         struct nlattr *class = NULL, *type = NULL, *data = NULL;
864         struct geneve_opt *opt;
865         int err, data_len = 0;
866
867         if (option_len > sizeof(struct geneve_opt))
868                 data_len = option_len - sizeof(struct geneve_opt);
869
870         opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
871         memset(opt, 0xff, option_len);
872         opt->length = data_len / 4;
873         opt->r1 = 0;
874         opt->r2 = 0;
875         opt->r3 = 0;
876
877         /* If no mask has been prodived we assume an exact match. */
878         if (!depth)
879                 return sizeof(struct geneve_opt) + data_len;
880
881         if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
882                 NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
883                 return -EINVAL;
884         }
885
886         err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
887                                nla, geneve_opt_policy, extack);
888         if (err < 0)
889                 return err;
890
891         /* We are not allowed to omit any of CLASS, TYPE or DATA
892          * fields from the key.
893          */
894         if (!option_len &&
895             (!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
896              !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
897              !tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
898                 NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
899                 return -EINVAL;
900         }
901
902         /* Omitting any of CLASS, TYPE or DATA fields is allowed
903          * for the mask.
904          */
905         if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
906                 int new_len = key->enc_opts.len;
907
908                 data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
909                 data_len = nla_len(data);
910                 if (data_len < 4) {
911                         NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
912                         return -ERANGE;
913                 }
914                 if (data_len % 4) {
915                         NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
916                         return -ERANGE;
917                 }
918
919                 new_len += sizeof(struct geneve_opt) + data_len;
920                 BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
921                 if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
922                         NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
923                         return -ERANGE;
924                 }
925                 opt->length = data_len / 4;
926                 memcpy(opt->opt_data, nla_data(data), data_len);
927         }
928
929         if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
930                 class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
931                 opt->opt_class = nla_get_be16(class);
932         }
933
934         if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
935                 type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
936                 opt->type = nla_get_u8(type);
937         }
938
939         return sizeof(struct geneve_opt) + data_len;
940 }
941
942 static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
943                           struct fl_flow_key *mask,
944                           struct netlink_ext_ack *extack)
945 {
946         const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
947         int err, option_len, key_depth, msk_depth = 0;
948
949         err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS],
950                                   TCA_FLOWER_KEY_ENC_OPTS_MAX,
951                                   enc_opts_policy, extack);
952         if (err)
953                 return err;
954
955         nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
956
957         if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
958                 err = nla_validate_nested(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK],
959                                           TCA_FLOWER_KEY_ENC_OPTS_MAX,
960                                           enc_opts_policy, extack);
961                 if (err)
962                         return err;
963
964                 nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
965                 msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
966         }
967
968         nla_for_each_attr(nla_opt_key, nla_enc_key,
969                           nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
970                 switch (nla_type(nla_opt_key)) {
971                 case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
972                         option_len = 0;
973                         key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
974                         option_len = fl_set_geneve_opt(nla_opt_key, key,
975                                                        key_depth, option_len,
976                                                        extack);
977                         if (option_len < 0)
978                                 return option_len;
979
980                         key->enc_opts.len += option_len;
981                         /* At the same time we need to parse through the mask
982                          * in order to verify exact and mask attribute lengths.
983                          */
984                         mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
985                         option_len = fl_set_geneve_opt(nla_opt_msk, mask,
986                                                        msk_depth, option_len,
987                                                        extack);
988                         if (option_len < 0)
989                                 return option_len;
990
991                         mask->enc_opts.len += option_len;
992                         if (key->enc_opts.len != mask->enc_opts.len) {
993                                 NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
994                                 return -EINVAL;
995                         }
996
997                         if (msk_depth)
998                                 nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
999                         break;
1000                 default:
1001                         NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
1002                         return -EINVAL;
1003                 }
1004         }
1005
1006         return 0;
1007 }
1008
1009 static int fl_set_key(struct net *net, struct nlattr **tb,
1010                       struct fl_flow_key *key, struct fl_flow_key *mask,
1011                       struct netlink_ext_ack *extack)
1012 {
1013         __be16 ethertype;
1014         int ret = 0;
1015 #ifdef CONFIG_NET_CLS_IND
1016         if (tb[TCA_FLOWER_INDEV]) {
1017                 int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV], extack);
1018                 if (err < 0)
1019                         return err;
1020                 key->indev_ifindex = err;
1021                 mask->indev_ifindex = 0xffffffff;
1022         }
1023 #endif
1024
1025         fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
1026                        mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
1027                        sizeof(key->eth.dst));
1028         fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
1029                        mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
1030                        sizeof(key->eth.src));
1031
1032         if (tb[TCA_FLOWER_KEY_ETH_TYPE]) {
1033                 ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]);
1034
1035                 if (eth_type_vlan(ethertype)) {
1036                         fl_set_key_vlan(tb, ethertype, TCA_FLOWER_KEY_VLAN_ID,
1037                                         TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan,
1038                                         &mask->vlan);
1039
1040                         if (tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]) {
1041                                 ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_VLAN_ETH_TYPE]);
1042                                 if (eth_type_vlan(ethertype)) {
1043                                         fl_set_key_vlan(tb, ethertype,
1044                                                         TCA_FLOWER_KEY_CVLAN_ID,
1045                                                         TCA_FLOWER_KEY_CVLAN_PRIO,
1046                                                         &key->cvlan, &mask->cvlan);
1047                                         fl_set_key_val(tb, &key->basic.n_proto,
1048                                                        TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
1049                                                        &mask->basic.n_proto,
1050                                                        TCA_FLOWER_UNSPEC,
1051                                                        sizeof(key->basic.n_proto));
1052                                 } else {
1053                                         key->basic.n_proto = ethertype;
1054                                         mask->basic.n_proto = cpu_to_be16(~0);
1055                                 }
1056                         }
1057                 } else {
1058                         key->basic.n_proto = ethertype;
1059                         mask->basic.n_proto = cpu_to_be16(~0);
1060                 }
1061         }
1062
1063         if (key->basic.n_proto == htons(ETH_P_IP) ||
1064             key->basic.n_proto == htons(ETH_P_IPV6)) {
1065                 fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
1066                                &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
1067                                sizeof(key->basic.ip_proto));
1068                 fl_set_key_ip(tb, false, &key->ip, &mask->ip);
1069         }
1070
1071         if (tb[TCA_FLOWER_KEY_IPV4_SRC] || tb[TCA_FLOWER_KEY_IPV4_DST]) {
1072                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1073                 mask->control.addr_type = ~0;
1074                 fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
1075                                &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
1076                                sizeof(key->ipv4.src));
1077                 fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
1078                                &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
1079                                sizeof(key->ipv4.dst));
1080         } else if (tb[TCA_FLOWER_KEY_IPV6_SRC] || tb[TCA_FLOWER_KEY_IPV6_DST]) {
1081                 key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1082                 mask->control.addr_type = ~0;
1083                 fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
1084                                &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
1085                                sizeof(key->ipv6.src));
1086                 fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
1087                                &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
1088                                sizeof(key->ipv6.dst));
1089         }
1090
1091         if (key->basic.ip_proto == IPPROTO_TCP) {
1092                 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
1093                                &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
1094                                sizeof(key->tp.src));
1095                 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
1096                                &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
1097                                sizeof(key->tp.dst));
1098                 fl_set_key_val(tb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
1099                                &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
1100                                sizeof(key->tcp.flags));
1101         } else if (key->basic.ip_proto == IPPROTO_UDP) {
1102                 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
1103                                &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
1104                                sizeof(key->tp.src));
1105                 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
1106                                &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
1107                                sizeof(key->tp.dst));
1108         } else if (key->basic.ip_proto == IPPROTO_SCTP) {
1109                 fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
1110                                &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
1111                                sizeof(key->tp.src));
1112                 fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
1113                                &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
1114                                sizeof(key->tp.dst));
1115         } else if (key->basic.n_proto == htons(ETH_P_IP) &&
1116                    key->basic.ip_proto == IPPROTO_ICMP) {
1117                 fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
1118                                &mask->icmp.type,
1119                                TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
1120                                sizeof(key->icmp.type));
1121                 fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
1122                                &mask->icmp.code,
1123                                TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
1124                                sizeof(key->icmp.code));
1125         } else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
1126                    key->basic.ip_proto == IPPROTO_ICMPV6) {
1127                 fl_set_key_val(tb, &key->icmp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
1128                                &mask->icmp.type,
1129                                TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
1130                                sizeof(key->icmp.type));
1131                 fl_set_key_val(tb, &key->icmp.code, TCA_FLOWER_KEY_ICMPV6_CODE,
1132                                &mask->icmp.code,
1133                                TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
1134                                sizeof(key->icmp.code));
1135         } else if (key->basic.n_proto == htons(ETH_P_MPLS_UC) ||
1136                    key->basic.n_proto == htons(ETH_P_MPLS_MC)) {
1137                 ret = fl_set_key_mpls(tb, &key->mpls, &mask->mpls);
1138                 if (ret)
1139                         return ret;
1140         } else if (key->basic.n_proto == htons(ETH_P_ARP) ||
1141                    key->basic.n_proto == htons(ETH_P_RARP)) {
1142                 fl_set_key_val(tb, &key->arp.sip, TCA_FLOWER_KEY_ARP_SIP,
1143                                &mask->arp.sip, TCA_FLOWER_KEY_ARP_SIP_MASK,
1144                                sizeof(key->arp.sip));
1145                 fl_set_key_val(tb, &key->arp.tip, TCA_FLOWER_KEY_ARP_TIP,
1146                                &mask->arp.tip, TCA_FLOWER_KEY_ARP_TIP_MASK,
1147                                sizeof(key->arp.tip));
1148                 fl_set_key_val(tb, &key->arp.op, TCA_FLOWER_KEY_ARP_OP,
1149                                &mask->arp.op, TCA_FLOWER_KEY_ARP_OP_MASK,
1150                                sizeof(key->arp.op));
1151                 fl_set_key_val(tb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
1152                                mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
1153                                sizeof(key->arp.sha));
1154                 fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
1155                                mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
1156                                sizeof(key->arp.tha));
1157         }
1158
1159         if (key->basic.ip_proto == IPPROTO_TCP ||
1160             key->basic.ip_proto == IPPROTO_UDP ||
1161             key->basic.ip_proto == IPPROTO_SCTP) {
1162                 ret = fl_set_key_port_range(tb, key, mask);
1163                 if (ret)
1164                         return ret;
1165         }
1166
1167         if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
1168             tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) {
1169                 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1170                 mask->enc_control.addr_type = ~0;
1171                 fl_set_key_val(tb, &key->enc_ipv4.src,
1172                                TCA_FLOWER_KEY_ENC_IPV4_SRC,
1173                                &mask->enc_ipv4.src,
1174                                TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
1175                                sizeof(key->enc_ipv4.src));
1176                 fl_set_key_val(tb, &key->enc_ipv4.dst,
1177                                TCA_FLOWER_KEY_ENC_IPV4_DST,
1178                                &mask->enc_ipv4.dst,
1179                                TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
1180                                sizeof(key->enc_ipv4.dst));
1181         }
1182
1183         if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] ||
1184             tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) {
1185                 key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1186                 mask->enc_control.addr_type = ~0;
1187                 fl_set_key_val(tb, &key->enc_ipv6.src,
1188                                TCA_FLOWER_KEY_ENC_IPV6_SRC,
1189                                &mask->enc_ipv6.src,
1190                                TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
1191                                sizeof(key->enc_ipv6.src));
1192                 fl_set_key_val(tb, &key->enc_ipv6.dst,
1193                                TCA_FLOWER_KEY_ENC_IPV6_DST,
1194                                &mask->enc_ipv6.dst,
1195                                TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
1196                                sizeof(key->enc_ipv6.dst));
1197         }
1198
1199         fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID,
1200                        &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC,
1201                        sizeof(key->enc_key_id.keyid));
1202
1203         fl_set_key_val(tb, &key->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
1204                        &mask->enc_tp.src, TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
1205                        sizeof(key->enc_tp.src));
1206
1207         fl_set_key_val(tb, &key->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
1208                        &mask->enc_tp.dst, TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
1209                        sizeof(key->enc_tp.dst));
1210
1211         fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
1212
1213         if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
1214                 ret = fl_set_enc_opt(tb, key, mask, extack);
1215                 if (ret)
1216                         return ret;
1217         }
1218
1219         if (tb[TCA_FLOWER_KEY_FLAGS])
1220                 ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
1221
1222         return ret;
1223 }
1224
1225 static void fl_mask_copy(struct fl_flow_mask *dst,
1226                          struct fl_flow_mask *src)
1227 {
1228         const void *psrc = fl_key_get_start(&src->key, src);
1229         void *pdst = fl_key_get_start(&dst->key, src);
1230
1231         memcpy(pdst, psrc, fl_mask_range(src));
1232         dst->range = src->range;
1233 }
1234
1235 static const struct rhashtable_params fl_ht_params = {
1236         .key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
1237         .head_offset = offsetof(struct cls_fl_filter, ht_node),
1238         .automatic_shrinking = true,
1239 };
1240
1241 static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
1242 {
1243         mask->filter_ht_params = fl_ht_params;
1244         mask->filter_ht_params.key_len = fl_mask_range(mask);
1245         mask->filter_ht_params.key_offset += mask->range.start;
1246
1247         return rhashtable_init(&mask->ht, &mask->filter_ht_params);
1248 }
1249
1250 #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
1251 #define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
1252
1253 #define FL_KEY_IS_MASKED(mask, member)                                          \
1254         memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member),               \
1255                    0, FL_KEY_MEMBER_SIZE(member))                               \
1256
1257 #define FL_KEY_SET(keys, cnt, id, member)                                       \
1258         do {                                                                    \
1259                 keys[cnt].key_id = id;                                          \
1260                 keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);                \
1261                 cnt++;                                                          \
1262         } while(0);
1263
1264 #define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member)                       \
1265         do {                                                                    \
1266                 if (FL_KEY_IS_MASKED(mask, member))                             \
1267                         FL_KEY_SET(keys, cnt, id, member);                      \
1268         } while(0);
1269
1270 static void fl_init_dissector(struct flow_dissector *dissector,
1271                               struct fl_flow_key *mask)
1272 {
1273         struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
1274         size_t cnt = 0;
1275
1276         FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control);
1277         FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
1278         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1279                              FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
1280         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1281                              FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
1282         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1283                              FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
1284         if (FL_KEY_IS_MASKED(mask, tp) ||
1285             FL_KEY_IS_MASKED(mask, tp_min) || FL_KEY_IS_MASKED(mask, tp_max))
1286                 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_PORTS, tp);
1287         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1288                              FLOW_DISSECTOR_KEY_IP, ip);
1289         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1290                              FLOW_DISSECTOR_KEY_TCP, tcp);
1291         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1292                              FLOW_DISSECTOR_KEY_ICMP, icmp);
1293         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1294                              FLOW_DISSECTOR_KEY_ARP, arp);
1295         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1296                              FLOW_DISSECTOR_KEY_MPLS, mpls);
1297         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1298                              FLOW_DISSECTOR_KEY_VLAN, vlan);
1299         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1300                              FLOW_DISSECTOR_KEY_CVLAN, cvlan);
1301         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1302                              FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
1303         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1304                              FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, enc_ipv4);
1305         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1306                              FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, enc_ipv6);
1307         if (FL_KEY_IS_MASKED(mask, enc_ipv4) ||
1308             FL_KEY_IS_MASKED(mask, enc_ipv6))
1309                 FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1310                            enc_control);
1311         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1312                              FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
1313         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1314                              FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
1315         FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1316                              FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
1317
1318         skb_flow_dissector_init(dissector, keys, cnt);
1319 }
1320
1321 static struct fl_flow_mask *fl_create_new_mask(struct cls_fl_head *head,
1322                                                struct fl_flow_mask *mask)
1323 {
1324         struct fl_flow_mask *newmask;
1325         int err;
1326
1327         newmask = kzalloc(sizeof(*newmask), GFP_KERNEL);
1328         if (!newmask)
1329                 return ERR_PTR(-ENOMEM);
1330
1331         fl_mask_copy(newmask, mask);
1332
1333         if ((newmask->key.tp_min.dst && newmask->key.tp_max.dst) ||
1334             (newmask->key.tp_min.src && newmask->key.tp_max.src))
1335                 newmask->flags |= TCA_FLOWER_MASK_FLAGS_RANGE;
1336
1337         err = fl_init_mask_hashtable(newmask);
1338         if (err)
1339                 goto errout_free;
1340
1341         fl_init_dissector(&newmask->dissector, &newmask->key);
1342
1343         INIT_LIST_HEAD_RCU(&newmask->filters);
1344
1345         refcount_set(&newmask->refcnt, 1);
1346         err = rhashtable_replace_fast(&head->ht, &mask->ht_node,
1347                                       &newmask->ht_node, mask_ht_params);
1348         if (err)
1349                 goto errout_destroy;
1350
1351         /* Wait until any potential concurrent users of mask are finished */
1352         synchronize_rcu();
1353
1354         spin_lock(&head->masks_lock);
1355         list_add_tail_rcu(&newmask->list, &head->masks);
1356         spin_unlock(&head->masks_lock);
1357
1358         return newmask;
1359
1360 errout_destroy:
1361         rhashtable_destroy(&newmask->ht);
1362 errout_free:
1363         kfree(newmask);
1364
1365         return ERR_PTR(err);
1366 }
1367
1368 static int fl_check_assign_mask(struct cls_fl_head *head,
1369                                 struct cls_fl_filter *fnew,
1370                                 struct cls_fl_filter *fold,
1371                                 struct fl_flow_mask *mask)
1372 {
1373         struct fl_flow_mask *newmask;
1374         int ret = 0;
1375
1376         rcu_read_lock();
1377
1378         /* Insert mask as temporary node to prevent concurrent creation of mask
1379          * with same key. Any concurrent lookups with same key will return
1380          * -EAGAIN because mask's refcnt is zero. It is safe to insert
1381          * stack-allocated 'mask' to masks hash table because we call
1382          * synchronize_rcu() before returning from this function (either in case
1383          * of error or after replacing it with heap-allocated mask in
1384          * fl_create_new_mask()).
1385          */
1386         fnew->mask = rhashtable_lookup_get_insert_fast(&head->ht,
1387                                                        &mask->ht_node,
1388                                                        mask_ht_params);
1389         if (!fnew->mask) {
1390                 rcu_read_unlock();
1391
1392                 if (fold) {
1393                         ret = -EINVAL;
1394                         goto errout_cleanup;
1395                 }
1396
1397                 newmask = fl_create_new_mask(head, mask);
1398                 if (IS_ERR(newmask)) {
1399                         ret = PTR_ERR(newmask);
1400                         goto errout_cleanup;
1401                 }
1402
1403                 fnew->mask = newmask;
1404                 return 0;
1405         } else if (IS_ERR(fnew->mask)) {
1406                 ret = PTR_ERR(fnew->mask);
1407         } else if (fold && fold->mask != fnew->mask) {
1408                 ret = -EINVAL;
1409         } else if (!refcount_inc_not_zero(&fnew->mask->refcnt)) {
1410                 /* Mask was deleted concurrently, try again */
1411                 ret = -EAGAIN;
1412         }
1413         rcu_read_unlock();
1414         return ret;
1415
1416 errout_cleanup:
1417         rhashtable_remove_fast(&head->ht, &mask->ht_node,
1418                                mask_ht_params);
1419         /* Wait until any potential concurrent users of mask are finished */
1420         synchronize_rcu();
1421         return ret;
1422 }
1423
1424 static int fl_set_parms(struct net *net, struct tcf_proto *tp,
1425                         struct cls_fl_filter *f, struct fl_flow_mask *mask,
1426                         unsigned long base, struct nlattr **tb,
1427                         struct nlattr *est, bool ovr,
1428                         struct fl_flow_tmplt *tmplt, bool rtnl_held,
1429                         struct netlink_ext_ack *extack)
1430 {
1431         int err;
1432
1433         err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, rtnl_held,
1434                                 extack);
1435         if (err < 0)
1436                 return err;
1437
1438         if (tb[TCA_FLOWER_CLASSID]) {
1439                 f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
1440                 if (!rtnl_held)
1441                         rtnl_lock();
1442                 tcf_bind_filter(tp, &f->res, base);
1443                 if (!rtnl_held)
1444                         rtnl_unlock();
1445         }
1446
1447         err = fl_set_key(net, tb, &f->key, &mask->key, extack);
1448         if (err)
1449                 return err;
1450
1451         fl_mask_update_range(mask);
1452         fl_set_masked_key(&f->mkey, &f->key, mask);
1453
1454         if (!fl_mask_fits_tmplt(tmplt, mask)) {
1455                 NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
1456                 return -EINVAL;
1457         }
1458
1459         return 0;
1460 }
1461
1462 static int fl_change(struct net *net, struct sk_buff *in_skb,
1463                      struct tcf_proto *tp, unsigned long base,
1464                      u32 handle, struct nlattr **tca,
1465                      void **arg, bool ovr, bool rtnl_held,
1466                      struct netlink_ext_ack *extack)
1467 {
1468         struct cls_fl_head *head = fl_head_dereference(tp);
1469         struct cls_fl_filter *fold = *arg;
1470         struct cls_fl_filter *fnew;
1471         struct fl_flow_mask *mask;
1472         struct nlattr **tb;
1473         int err;
1474
1475         if (!tca[TCA_OPTIONS]) {
1476                 err = -EINVAL;
1477                 goto errout_fold;
1478         }
1479
1480         mask = kzalloc(sizeof(struct fl_flow_mask), GFP_KERNEL);
1481         if (!mask) {
1482                 err = -ENOBUFS;
1483                 goto errout_fold;
1484         }
1485
1486         tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
1487         if (!tb) {
1488                 err = -ENOBUFS;
1489                 goto errout_mask_alloc;
1490         }
1491
1492         err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
1493                                fl_policy, NULL);
1494         if (err < 0)
1495                 goto errout_tb;
1496
1497         if (fold && handle && fold->handle != handle) {
1498                 err = -EINVAL;
1499                 goto errout_tb;
1500         }
1501
1502         fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
1503         if (!fnew) {
1504                 err = -ENOBUFS;
1505                 goto errout_tb;
1506         }
1507         refcount_set(&fnew->refcnt, 1);
1508
1509         err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
1510         if (err < 0)
1511                 goto errout;
1512
1513         if (tb[TCA_FLOWER_FLAGS]) {
1514                 fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
1515
1516                 if (!tc_flags_valid(fnew->flags)) {
1517                         err = -EINVAL;
1518                         goto errout;
1519                 }
1520         }
1521
1522         err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
1523                            tp->chain->tmplt_priv, rtnl_held, extack);
1524         if (err)
1525                 goto errout;
1526
1527         err = fl_check_assign_mask(head, fnew, fold, mask);
1528         if (err)
1529                 goto errout;
1530
1531         if (!tc_skip_hw(fnew->flags)) {
1532                 err = fl_hw_replace_filter(tp, fnew, rtnl_held, extack);
1533                 if (err)
1534                         goto errout_mask;
1535         }
1536
1537         if (!tc_in_hw(fnew->flags))
1538                 fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
1539
1540         spin_lock(&tp->lock);
1541
1542         /* tp was deleted concurrently. -EAGAIN will cause caller to lookup
1543          * proto again or create new one, if necessary.
1544          */
1545         if (tp->deleting) {
1546                 err = -EAGAIN;
1547                 goto errout_hw;
1548         }
1549
1550         refcount_inc(&fnew->refcnt);
1551         if (fold) {
1552                 /* Fold filter was deleted concurrently. Retry lookup. */
1553                 if (fold->deleted) {
1554                         err = -EAGAIN;
1555                         goto errout_hw;
1556                 }
1557
1558                 fnew->handle = handle;
1559
1560                 err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
1561                                              fnew->mask->filter_ht_params);
1562                 if (err)
1563                         goto errout_hw;
1564
1565                 rhashtable_remove_fast(&fold->mask->ht,
1566                                        &fold->ht_node,
1567                                        fold->mask->filter_ht_params);
1568                 idr_replace(&head->handle_idr, fnew, fnew->handle);
1569                 list_replace_rcu(&fold->list, &fnew->list);
1570                 fold->deleted = true;
1571
1572                 spin_unlock(&tp->lock);
1573
1574                 fl_mask_put(head, fold->mask, true);
1575                 if (!tc_skip_hw(fold->flags))
1576                         fl_hw_destroy_filter(tp, fold, rtnl_held, NULL);
1577                 tcf_unbind_filter(tp, &fold->res);
1578                 tcf_exts_get_net(&fold->exts);
1579                 /* Caller holds reference to fold, so refcnt is always > 0
1580                  * after this.
1581                  */
1582                 refcount_dec(&fold->refcnt);
1583                 __fl_put(fold);
1584         } else {
1585                 if (__fl_lookup(fnew->mask, &fnew->mkey)) {
1586                         err = -EEXIST;
1587                         goto errout_hw;
1588                 }
1589
1590                 if (handle) {
1591                         /* user specifies a handle and it doesn't exist */
1592                         err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
1593                                             handle, GFP_ATOMIC);
1594
1595                         /* Filter with specified handle was concurrently
1596                          * inserted after initial check in cls_api. This is not
1597                          * necessarily an error if NLM_F_EXCL is not set in
1598                          * message flags. Returning EAGAIN will cause cls_api to
1599                          * try to update concurrently inserted rule.
1600                          */
1601                         if (err == -ENOSPC)
1602                                 err = -EAGAIN;
1603                 } else {
1604                         handle = 1;
1605                         err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
1606                                             INT_MAX, GFP_ATOMIC);
1607                 }
1608                 if (err)
1609                         goto errout_hw;
1610
1611                 fnew->handle = handle;
1612
1613                 err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
1614                                              fnew->mask->filter_ht_params);
1615                 if (err)
1616                         goto errout_idr;
1617
1618                 list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
1619                 spin_unlock(&tp->lock);
1620         }
1621
1622         *arg = fnew;
1623
1624         kfree(tb);
1625         kfree(mask);
1626         return 0;
1627
1628 errout_idr:
1629         idr_remove(&head->handle_idr, fnew->handle);
1630 errout_hw:
1631         spin_unlock(&tp->lock);
1632         if (!tc_skip_hw(fnew->flags))
1633                 fl_hw_destroy_filter(tp, fnew, rtnl_held, NULL);
1634 errout_mask:
1635         fl_mask_put(head, fnew->mask, true);
1636 errout:
1637         tcf_exts_destroy(&fnew->exts);
1638         kfree(fnew);
1639 errout_tb:
1640         kfree(tb);
1641 errout_mask_alloc:
1642         kfree(mask);
1643 errout_fold:
1644         if (fold)
1645                 __fl_put(fold);
1646         return err;
1647 }
1648
1649 static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
1650                      bool rtnl_held, struct netlink_ext_ack *extack)
1651 {
1652         struct cls_fl_head *head = fl_head_dereference(tp);
1653         struct cls_fl_filter *f = arg;
1654         bool last_on_mask;
1655         int err = 0;
1656
1657         err = __fl_delete(tp, f, &last_on_mask, rtnl_held, extack);
1658         *last = list_empty(&head->masks);
1659         __fl_put(f);
1660
1661         return err;
1662 }
1663
1664 static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
1665                     bool rtnl_held)
1666 {
1667         struct cls_fl_filter *f;
1668
1669         arg->count = arg->skip;
1670
1671         while ((f = fl_get_next_filter(tp, &arg->cookie)) != NULL) {
1672                 if (arg->fn(tp, f, arg) < 0) {
1673                         __fl_put(f);
1674                         arg->stop = 1;
1675                         break;
1676                 }
1677                 __fl_put(f);
1678                 arg->cookie++;
1679                 arg->count++;
1680         }
1681 }
1682
1683 static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
1684                         void *cb_priv, struct netlink_ext_ack *extack)
1685 {
1686         struct tc_cls_flower_offload cls_flower = {};
1687         struct tcf_block *block = tp->chain->block;
1688         unsigned long handle = 0;
1689         struct cls_fl_filter *f;
1690         int err;
1691
1692         while ((f = fl_get_next_filter(tp, &handle))) {
1693                 if (tc_skip_hw(f->flags))
1694                         goto next_flow;
1695
1696                 cls_flower.rule =
1697                         flow_rule_alloc(tcf_exts_num_actions(&f->exts));
1698                 if (!cls_flower.rule) {
1699                         __fl_put(f);
1700                         return -ENOMEM;
1701                 }
1702
1703                 tc_cls_common_offload_init(&cls_flower.common, tp, f->flags,
1704                                            extack);
1705                 cls_flower.command = add ?
1706                         TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
1707                 cls_flower.cookie = (unsigned long)f;
1708                 cls_flower.rule->match.dissector = &f->mask->dissector;
1709                 cls_flower.rule->match.mask = &f->mask->key;
1710                 cls_flower.rule->match.key = &f->mkey;
1711
1712                 err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
1713                 if (err) {
1714                         kfree(cls_flower.rule);
1715                         if (tc_skip_sw(f->flags)) {
1716                                 NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
1717                                 __fl_put(f);
1718                                 return err;
1719                         }
1720                         goto next_flow;
1721                 }
1722
1723                 cls_flower.classid = f->res.classid;
1724
1725                 err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
1726                 kfree(cls_flower.rule);
1727
1728                 if (err) {
1729                         if (add && tc_skip_sw(f->flags)) {
1730                                 __fl_put(f);
1731                                 return err;
1732                         }
1733                         goto next_flow;
1734                 }
1735
1736                 spin_lock(&tp->lock);
1737                 tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags,
1738                                           add);
1739                 spin_unlock(&tp->lock);
1740 next_flow:
1741                 handle++;
1742                 __fl_put(f);
1743         }
1744
1745         return 0;
1746 }
1747
1748 static int fl_hw_create_tmplt(struct tcf_chain *chain,
1749                               struct fl_flow_tmplt *tmplt)
1750 {
1751         struct tc_cls_flower_offload cls_flower = {};
1752         struct tcf_block *block = chain->block;
1753
1754         cls_flower.rule = flow_rule_alloc(0);
1755         if (!cls_flower.rule)
1756                 return -ENOMEM;
1757
1758         cls_flower.common.chain_index = chain->index;
1759         cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
1760         cls_flower.cookie = (unsigned long) tmplt;
1761         cls_flower.rule->match.dissector = &tmplt->dissector;
1762         cls_flower.rule->match.mask = &tmplt->mask;
1763         cls_flower.rule->match.key = &tmplt->dummy_key;
1764
1765         /* We don't care if driver (any of them) fails to handle this
1766          * call. It serves just as a hint for it.
1767          */
1768         tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
1769         kfree(cls_flower.rule);
1770
1771         return 0;
1772 }
1773
1774 static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
1775                                 struct fl_flow_tmplt *tmplt)
1776 {
1777         struct tc_cls_flower_offload cls_flower = {};
1778         struct tcf_block *block = chain->block;
1779
1780         cls_flower.common.chain_index = chain->index;
1781         cls_flower.command = TC_CLSFLOWER_TMPLT_DESTROY;
1782         cls_flower.cookie = (unsigned long) tmplt;
1783
1784         tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
1785 }
1786
1787 static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
1788                              struct nlattr **tca,
1789                              struct netlink_ext_ack *extack)
1790 {
1791         struct fl_flow_tmplt *tmplt;
1792         struct nlattr **tb;
1793         int err;
1794
1795         if (!tca[TCA_OPTIONS])
1796                 return ERR_PTR(-EINVAL);
1797
1798         tb = kcalloc(TCA_FLOWER_MAX + 1, sizeof(struct nlattr *), GFP_KERNEL);
1799         if (!tb)
1800                 return ERR_PTR(-ENOBUFS);
1801         err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS],
1802                                fl_policy, NULL);
1803         if (err)
1804                 goto errout_tb;
1805
1806         tmplt = kzalloc(sizeof(*tmplt), GFP_KERNEL);
1807         if (!tmplt) {
1808                 err = -ENOMEM;
1809                 goto errout_tb;
1810         }
1811         tmplt->chain = chain;
1812         err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
1813         if (err)
1814                 goto errout_tmplt;
1815
1816         fl_init_dissector(&tmplt->dissector, &tmplt->mask);
1817
1818         err = fl_hw_create_tmplt(chain, tmplt);
1819         if (err)
1820                 goto errout_tmplt;
1821
1822         kfree(tb);
1823         return tmplt;
1824
1825 errout_tmplt:
1826         kfree(tmplt);
1827 errout_tb:
1828         kfree(tb);
1829         return ERR_PTR(err);
1830 }
1831
1832 static void fl_tmplt_destroy(void *tmplt_priv)
1833 {
1834         struct fl_flow_tmplt *tmplt = tmplt_priv;
1835
1836         fl_hw_destroy_tmplt(tmplt->chain, tmplt);
1837         kfree(tmplt);
1838 }
1839
1840 static int fl_dump_key_val(struct sk_buff *skb,
1841                            void *val, int val_type,
1842                            void *mask, int mask_type, int len)
1843 {
1844         int err;
1845
1846         if (!memchr_inv(mask, 0, len))
1847                 return 0;
1848         err = nla_put(skb, val_type, len, val);
1849         if (err)
1850                 return err;
1851         if (mask_type != TCA_FLOWER_UNSPEC) {
1852                 err = nla_put(skb, mask_type, len, mask);
1853                 if (err)
1854                         return err;
1855         }
1856         return 0;
1857 }
1858
1859 static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
1860                                   struct fl_flow_key *mask)
1861 {
1862         if (fl_dump_key_val(skb, &key->tp_min.dst, TCA_FLOWER_KEY_PORT_DST_MIN,
1863                             &mask->tp_min.dst, TCA_FLOWER_UNSPEC,
1864                             sizeof(key->tp_min.dst)) ||
1865             fl_dump_key_val(skb, &key->tp_max.dst, TCA_FLOWER_KEY_PORT_DST_MAX,
1866                             &mask->tp_max.dst, TCA_FLOWER_UNSPEC,
1867                             sizeof(key->tp_max.dst)) ||
1868             fl_dump_key_val(skb, &key->tp_min.src, TCA_FLOWER_KEY_PORT_SRC_MIN,
1869                             &mask->tp_min.src, TCA_FLOWER_UNSPEC,
1870                             sizeof(key->tp_min.src)) ||
1871             fl_dump_key_val(skb, &key->tp_max.src, TCA_FLOWER_KEY_PORT_SRC_MAX,
1872                             &mask->tp_max.src, TCA_FLOWER_UNSPEC,
1873                             sizeof(key->tp_max.src)))
1874                 return -1;
1875
1876         return 0;
1877 }
1878
1879 static int fl_dump_key_mpls(struct sk_buff *skb,
1880                             struct flow_dissector_key_mpls *mpls_key,
1881                             struct flow_dissector_key_mpls *mpls_mask)
1882 {
1883         int err;
1884
1885         if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
1886                 return 0;
1887         if (mpls_mask->mpls_ttl) {
1888                 err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
1889                                  mpls_key->mpls_ttl);
1890                 if (err)
1891                         return err;
1892         }
1893         if (mpls_mask->mpls_tc) {
1894                 err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
1895                                  mpls_key->mpls_tc);
1896                 if (err)
1897                         return err;
1898         }
1899         if (mpls_mask->mpls_label) {
1900                 err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
1901                                   mpls_key->mpls_label);
1902                 if (err)
1903                         return err;
1904         }
1905         if (mpls_mask->mpls_bos) {
1906                 err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
1907                                  mpls_key->mpls_bos);
1908                 if (err)
1909                         return err;
1910         }
1911         return 0;
1912 }
1913
1914 static int fl_dump_key_ip(struct sk_buff *skb, bool encap,
1915                           struct flow_dissector_key_ip *key,
1916                           struct flow_dissector_key_ip *mask)
1917 {
1918         int tos_key = encap ? TCA_FLOWER_KEY_ENC_IP_TOS : TCA_FLOWER_KEY_IP_TOS;
1919         int ttl_key = encap ? TCA_FLOWER_KEY_ENC_IP_TTL : TCA_FLOWER_KEY_IP_TTL;
1920         int tos_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TOS_MASK : TCA_FLOWER_KEY_IP_TOS_MASK;
1921         int ttl_mask = encap ? TCA_FLOWER_KEY_ENC_IP_TTL_MASK : TCA_FLOWER_KEY_IP_TTL_MASK;
1922
1923         if (fl_dump_key_val(skb, &key->tos, tos_key, &mask->tos, tos_mask, sizeof(key->tos)) ||
1924             fl_dump_key_val(skb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl)))
1925                 return -1;
1926
1927         return 0;
1928 }
1929
1930 static int fl_dump_key_vlan(struct sk_buff *skb,
1931                             int vlan_id_key, int vlan_prio_key,
1932                             struct flow_dissector_key_vlan *vlan_key,
1933                             struct flow_dissector_key_vlan *vlan_mask)
1934 {
1935         int err;
1936
1937         if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask)))
1938                 return 0;
1939         if (vlan_mask->vlan_id) {
1940                 err = nla_put_u16(skb, vlan_id_key,
1941                                   vlan_key->vlan_id);
1942                 if (err)
1943                         return err;
1944         }
1945         if (vlan_mask->vlan_priority) {
1946                 err = nla_put_u8(skb, vlan_prio_key,
1947                                  vlan_key->vlan_priority);
1948                 if (err)
1949                         return err;
1950         }
1951         return 0;
1952 }
1953
1954 static void fl_get_key_flag(u32 dissector_key, u32 dissector_mask,
1955                             u32 *flower_key, u32 *flower_mask,
1956                             u32 flower_flag_bit, u32 dissector_flag_bit)
1957 {
1958         if (dissector_mask & dissector_flag_bit) {
1959                 *flower_mask |= flower_flag_bit;
1960                 if (dissector_key & dissector_flag_bit)
1961                         *flower_key |= flower_flag_bit;
1962         }
1963 }
1964
1965 static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
1966 {
1967         u32 key, mask;
1968         __be32 _key, _mask;
1969         int err;
1970
1971         if (!memchr_inv(&flags_mask, 0, sizeof(flags_mask)))
1972                 return 0;
1973
1974         key = 0;
1975         mask = 0;
1976
1977         fl_get_key_flag(flags_key, flags_mask, &key, &mask,
1978                         TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT, FLOW_DIS_IS_FRAGMENT);
1979         fl_get_key_flag(flags_key, flags_mask, &key, &mask,
1980                         TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST,
1981                         FLOW_DIS_FIRST_FRAG);
1982
1983         _key = cpu_to_be32(key);
1984         _mask = cpu_to_be32(mask);
1985
1986         err = nla_put(skb, TCA_FLOWER_KEY_FLAGS, 4, &_key);
1987         if (err)
1988                 return err;
1989
1990         return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
1991 }
1992
1993 static int fl_dump_key_geneve_opt(struct sk_buff *skb,
1994                                   struct flow_dissector_key_enc_opts *enc_opts)
1995 {
1996         struct geneve_opt *opt;
1997         struct nlattr *nest;
1998         int opt_off = 0;
1999
2000         nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
2001         if (!nest)
2002                 goto nla_put_failure;
2003
2004         while (enc_opts->len > opt_off) {
2005                 opt = (struct geneve_opt *)&enc_opts->data[opt_off];
2006
2007                 if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
2008                                  opt->opt_class))
2009                         goto nla_put_failure;
2010                 if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,
2011                                opt->type))
2012                         goto nla_put_failure;
2013                 if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,
2014                             opt->length * 4, opt->opt_data))
2015                         goto nla_put_failure;
2016
2017                 opt_off += sizeof(struct geneve_opt) + opt->length * 4;
2018         }
2019         nla_nest_end(skb, nest);
2020         return 0;
2021
2022 nla_put_failure:
2023         nla_nest_cancel(skb, nest);
2024         return -EMSGSIZE;
2025 }
2026
2027 static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
2028                                struct flow_dissector_key_enc_opts *enc_opts)
2029 {
2030         struct nlattr *nest;
2031         int err;
2032
2033         if (!enc_opts->len)
2034                 return 0;
2035
2036         nest = nla_nest_start(skb, enc_opt_type);
2037         if (!nest)
2038                 goto nla_put_failure;
2039
2040         switch (enc_opts->dst_opt_type) {
2041         case TUNNEL_GENEVE_OPT:
2042                 err = fl_dump_key_geneve_opt(skb, enc_opts);
2043                 if (err)
2044                         goto nla_put_failure;
2045                 break;
2046         default:
2047                 goto nla_put_failure;
2048         }
2049         nla_nest_end(skb, nest);
2050         return 0;
2051
2052 nla_put_failure:
2053         nla_nest_cancel(skb, nest);
2054         return -EMSGSIZE;
2055 }
2056
2057 static int fl_dump_key_enc_opt(struct sk_buff *skb,
2058                                struct flow_dissector_key_enc_opts *key_opts,
2059                                struct flow_dissector_key_enc_opts *msk_opts)
2060 {
2061         int err;
2062
2063         err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts);
2064         if (err)
2065                 return err;
2066
2067         return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts);
2068 }
2069
2070 static int fl_dump_key(struct sk_buff *skb, struct net *net,
2071                        struct fl_flow_key *key, struct fl_flow_key *mask)
2072 {
2073         if (mask->indev_ifindex) {
2074                 struct net_device *dev;
2075
2076                 dev = __dev_get_by_index(net, key->indev_ifindex);
2077                 if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
2078                         goto nla_put_failure;
2079         }
2080
2081         if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
2082                             mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
2083                             sizeof(key->eth.dst)) ||
2084             fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
2085                             mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
2086                             sizeof(key->eth.src)) ||
2087             fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
2088                             &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
2089                             sizeof(key->basic.n_proto)))
2090                 goto nla_put_failure;
2091
2092         if (fl_dump_key_mpls(skb, &key->mpls, &mask->mpls))
2093                 goto nla_put_failure;
2094
2095         if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_VLAN_ID,
2096                              TCA_FLOWER_KEY_VLAN_PRIO, &key->vlan, &mask->vlan))
2097                 goto nla_put_failure;
2098
2099         if (fl_dump_key_vlan(skb, TCA_FLOWER_KEY_CVLAN_ID,
2100                              TCA_FLOWER_KEY_CVLAN_PRIO,
2101                              &key->cvlan, &mask->cvlan) ||
2102             (mask->cvlan.vlan_tpid &&
2103              nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
2104                           key->cvlan.vlan_tpid)))
2105                 goto nla_put_failure;
2106
2107         if (mask->basic.n_proto) {
2108                 if (mask->cvlan.vlan_tpid) {
2109                         if (nla_put_be16(skb, TCA_FLOWER_KEY_CVLAN_ETH_TYPE,
2110                                          key->basic.n_proto))
2111                                 goto nla_put_failure;
2112                 } else if (mask->vlan.vlan_tpid) {
2113                         if (nla_put_be16(skb, TCA_FLOWER_KEY_VLAN_ETH_TYPE,
2114                                          key->basic.n_proto))
2115                                 goto nla_put_failure;
2116                 }
2117         }
2118
2119         if ((key->basic.n_proto == htons(ETH_P_IP) ||
2120              key->basic.n_proto == htons(ETH_P_IPV6)) &&
2121             (fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
2122                             &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
2123                             sizeof(key->basic.ip_proto)) ||
2124             fl_dump_key_ip(skb, false, &key->ip, &mask->ip)))
2125                 goto nla_put_failure;
2126
2127         if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
2128             (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
2129                              &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
2130                              sizeof(key->ipv4.src)) ||
2131              fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
2132                              &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
2133                              sizeof(key->ipv4.dst))))
2134                 goto nla_put_failure;
2135         else if (key->control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
2136                  (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
2137                                   &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
2138                                   sizeof(key->ipv6.src)) ||
2139                   fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
2140                                   &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
2141                                   sizeof(key->ipv6.dst))))
2142                 goto nla_put_failure;
2143
2144         if (key->basic.ip_proto == IPPROTO_TCP &&
2145             (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
2146                              &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK,
2147                              sizeof(key->tp.src)) ||
2148              fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
2149                              &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK,
2150                              sizeof(key->tp.dst)) ||
2151              fl_dump_key_val(skb, &key->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS,
2152                              &mask->tcp.flags, TCA_FLOWER_KEY_TCP_FLAGS_MASK,
2153                              sizeof(key->tcp.flags))))
2154                 goto nla_put_failure;
2155         else if (key->basic.ip_proto == IPPROTO_UDP &&
2156                  (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
2157                                   &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK,
2158                                   sizeof(key->tp.src)) ||
2159                   fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
2160                                   &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK,
2161                                   sizeof(key->tp.dst))))
2162                 goto nla_put_failure;
2163         else if (key->basic.ip_proto == IPPROTO_SCTP &&
2164                  (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_SCTP_SRC,
2165                                   &mask->tp.src, TCA_FLOWER_KEY_SCTP_SRC_MASK,
2166                                   sizeof(key->tp.src)) ||
2167                   fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
2168                                   &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
2169                                   sizeof(key->tp.dst))))
2170                 goto nla_put_failure;
2171         else if (key->basic.n_proto == htons(ETH_P_IP) &&
2172                  key->basic.ip_proto == IPPROTO_ICMP &&
2173                  (fl_dump_key_val(skb, &key->icmp.type,
2174                                   TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->icmp.type,
2175                                   TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
2176                                   sizeof(key->icmp.type)) ||
2177                   fl_dump_key_val(skb, &key->icmp.code,
2178                                   TCA_FLOWER_KEY_ICMPV4_CODE, &mask->icmp.code,
2179                                   TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
2180                                   sizeof(key->icmp.code))))
2181                 goto nla_put_failure;
2182         else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
2183                  key->basic.ip_proto == IPPROTO_ICMPV6 &&
2184                  (fl_dump_key_val(skb, &key->icmp.type,
2185                                   TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->icmp.type,
2186                                   TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
2187                                   sizeof(key->icmp.type)) ||
2188                   fl_dump_key_val(skb, &key->icmp.code,
2189                                   TCA_FLOWER_KEY_ICMPV6_CODE, &mask->icmp.code,
2190                                   TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
2191                                   sizeof(key->icmp.code))))
2192                 goto nla_put_failure;
2193         else if ((key->basic.n_proto == htons(ETH_P_ARP) ||
2194                   key->basic.n_proto == htons(ETH_P_RARP)) &&
2195                  (fl_dump_key_val(skb, &key->arp.sip,
2196                                   TCA_FLOWER_KEY_ARP_SIP, &mask->arp.sip,
2197                                   TCA_FLOWER_KEY_ARP_SIP_MASK,
2198                                   sizeof(key->arp.sip)) ||
2199                   fl_dump_key_val(skb, &key->arp.tip,
2200                                   TCA_FLOWER_KEY_ARP_TIP, &mask->arp.tip,
2201                                   TCA_FLOWER_KEY_ARP_TIP_MASK,
2202                                   sizeof(key->arp.tip)) ||
2203                   fl_dump_key_val(skb, &key->arp.op,
2204                                   TCA_FLOWER_KEY_ARP_OP, &mask->arp.op,
2205                                   TCA_FLOWER_KEY_ARP_OP_MASK,
2206                                   sizeof(key->arp.op)) ||
2207                   fl_dump_key_val(skb, key->arp.sha, TCA_FLOWER_KEY_ARP_SHA,
2208                                   mask->arp.sha, TCA_FLOWER_KEY_ARP_SHA_MASK,
2209                                   sizeof(key->arp.sha)) ||
2210                   fl_dump_key_val(skb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA,
2211                                   mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK,
2212                                   sizeof(key->arp.tha))))
2213                 goto nla_put_failure;
2214
2215         if ((key->basic.ip_proto == IPPROTO_TCP ||
2216              key->basic.ip_proto == IPPROTO_UDP ||
2217              key->basic.ip_proto == IPPROTO_SCTP) &&
2218              fl_dump_key_port_range(skb, key, mask))
2219                 goto nla_put_failure;
2220
2221         if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
2222             (fl_dump_key_val(skb, &key->enc_ipv4.src,
2223                             TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src,
2224                             TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,
2225                             sizeof(key->enc_ipv4.src)) ||
2226              fl_dump_key_val(skb, &key->enc_ipv4.dst,
2227                              TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst,
2228                              TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,
2229                              sizeof(key->enc_ipv4.dst))))
2230                 goto nla_put_failure;
2231         else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS &&
2232                  (fl_dump_key_val(skb, &key->enc_ipv6.src,
2233                             TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src,
2234                             TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,
2235                             sizeof(key->enc_ipv6.src)) ||
2236                  fl_dump_key_val(skb, &key->enc_ipv6.dst,
2237                                  TCA_FLOWER_KEY_ENC_IPV6_DST,
2238                                  &mask->enc_ipv6.dst,
2239                                  TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,
2240                             sizeof(key->enc_ipv6.dst))))
2241                 goto nla_put_failure;
2242
2243         if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID,
2244                             &mask->enc_key_id, TCA_FLOWER_UNSPEC,
2245                             sizeof(key->enc_key_id)) ||
2246             fl_dump_key_val(skb, &key->enc_tp.src,
2247                             TCA_FLOWER_KEY_ENC_UDP_SRC_PORT,
2248                             &mask->enc_tp.src,
2249                             TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,
2250                             sizeof(key->enc_tp.src)) ||
2251             fl_dump_key_val(skb, &key->enc_tp.dst,
2252                             TCA_FLOWER_KEY_ENC_UDP_DST_PORT,
2253                             &mask->enc_tp.dst,
2254                             TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
2255                             sizeof(key->enc_tp.dst)) ||
2256             fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) ||
2257             fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
2258                 goto nla_put_failure;
2259
2260         if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))
2261                 goto nla_put_failure;
2262
2263         return 0;
2264
2265 nla_put_failure:
2266         return -EMSGSIZE;
2267 }
2268
2269 static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
2270                    struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
2271 {
2272         struct cls_fl_filter *f = fh;
2273         struct nlattr *nest;
2274         struct fl_flow_key *key, *mask;
2275         bool skip_hw;
2276
2277         if (!f)
2278                 return skb->len;
2279
2280         t->tcm_handle = f->handle;
2281
2282         nest = nla_nest_start(skb, TCA_OPTIONS);
2283         if (!nest)
2284                 goto nla_put_failure;
2285
2286         spin_lock(&tp->lock);
2287
2288         if (f->res.classid &&
2289             nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
2290                 goto nla_put_failure_locked;
2291
2292         key = &f->key;
2293         mask = &f->mask->key;
2294         skip_hw = tc_skip_hw(f->flags);
2295
2296         if (fl_dump_key(skb, net, key, mask))
2297                 goto nla_put_failure_locked;
2298
2299         if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
2300                 goto nla_put_failure_locked;
2301
2302         spin_unlock(&tp->lock);
2303
2304         if (!skip_hw)
2305                 fl_hw_update_stats(tp, f, rtnl_held);
2306
2307         if (nla_put_u32(skb, TCA_FLOWER_IN_HW_COUNT, f->in_hw_count))
2308                 goto nla_put_failure;
2309
2310         if (tcf_exts_dump(skb, &f->exts))
2311                 goto nla_put_failure;
2312
2313         nla_nest_end(skb, nest);
2314
2315         if (tcf_exts_dump_stats(skb, &f->exts) < 0)
2316                 goto nla_put_failure;
2317
2318         return skb->len;
2319
2320 nla_put_failure_locked:
2321         spin_unlock(&tp->lock);
2322 nla_put_failure:
2323         nla_nest_cancel(skb, nest);
2324         return -1;
2325 }
2326
2327 static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
2328 {
2329         struct fl_flow_tmplt *tmplt = tmplt_priv;
2330         struct fl_flow_key *key, *mask;
2331         struct nlattr *nest;
2332
2333         nest = nla_nest_start(skb, TCA_OPTIONS);
2334         if (!nest)
2335                 goto nla_put_failure;
2336
2337         key = &tmplt->dummy_key;
2338         mask = &tmplt->mask;
2339
2340         if (fl_dump_key(skb, net, key, mask))
2341                 goto nla_put_failure;
2342
2343         nla_nest_end(skb, nest);
2344
2345         return skb->len;
2346
2347 nla_put_failure:
2348         nla_nest_cancel(skb, nest);
2349         return -EMSGSIZE;
2350 }
2351
2352 static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
2353 {
2354         struct cls_fl_filter *f = fh;
2355
2356         if (f && f->res.classid == classid)
2357                 f->res.class = cl;
2358 }
2359
2360 static struct tcf_proto_ops cls_fl_ops __read_mostly = {
2361         .kind           = "flower",
2362         .classify       = fl_classify,
2363         .init           = fl_init,
2364         .destroy        = fl_destroy,
2365         .get            = fl_get,
2366         .put            = fl_put,
2367         .change         = fl_change,
2368         .delete         = fl_delete,
2369         .walk           = fl_walk,
2370         .reoffload      = fl_reoffload,
2371         .dump           = fl_dump,
2372         .bind_class     = fl_bind_class,
2373         .tmplt_create   = fl_tmplt_create,
2374         .tmplt_destroy  = fl_tmplt_destroy,
2375         .tmplt_dump     = fl_tmplt_dump,
2376         .owner          = THIS_MODULE,
2377         .flags          = TCF_PROTO_OPS_DOIT_UNLOCKED,
2378 };
2379
2380 static int __init cls_fl_init(void)
2381 {
2382         return register_tcf_proto_ops(&cls_fl_ops);
2383 }
2384
2385 static void __exit cls_fl_exit(void)
2386 {
2387         unregister_tcf_proto_ops(&cls_fl_ops);
2388 }
2389
2390 module_init(cls_fl_init);
2391 module_exit(cls_fl_exit);
2392
2393 MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
2394 MODULE_DESCRIPTION("Flower classifier");
2395 MODULE_LICENSE("GPL v2");