f40256a3e7f0470f9bdbbf3d27bbb3129595f37b
[linux-block.git] / net / sched / cls_api.c
1 /*
2  * net/sched/cls_api.c  Packet classifier API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  *
13  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
14  *
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/string.h>
21 #include <linux/errno.h>
22 #include <linux/err.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/kmod.h>
26 #include <linux/slab.h>
27 #include <net/net_namespace.h>
28 #include <net/sock.h>
29 #include <net/netlink.h>
30 #include <net/pkt_sched.h>
31 #include <net/pkt_cls.h>
32
33 /* The list of all installed classifier types */
34 static LIST_HEAD(tcf_proto_base);
35
36 /* Protects list of registered TC modules. It is pure SMP lock. */
37 static DEFINE_RWLOCK(cls_mod_lock);
38
39 /* Find classifier type by string name */
40
41 static const struct tcf_proto_ops *tcf_proto_lookup_ops(const char *kind)
42 {
43         const struct tcf_proto_ops *t, *res = NULL;
44
45         if (kind) {
46                 read_lock(&cls_mod_lock);
47                 list_for_each_entry(t, &tcf_proto_base, head) {
48                         if (strcmp(kind, t->kind) == 0) {
49                                 if (try_module_get(t->owner))
50                                         res = t;
51                                 break;
52                         }
53                 }
54                 read_unlock(&cls_mod_lock);
55         }
56         return res;
57 }
58
59 /* Register(unregister) new classifier type */
60
61 int register_tcf_proto_ops(struct tcf_proto_ops *ops)
62 {
63         struct tcf_proto_ops *t;
64         int rc = -EEXIST;
65
66         write_lock(&cls_mod_lock);
67         list_for_each_entry(t, &tcf_proto_base, head)
68                 if (!strcmp(ops->kind, t->kind))
69                         goto out;
70
71         list_add_tail(&ops->head, &tcf_proto_base);
72         rc = 0;
73 out:
74         write_unlock(&cls_mod_lock);
75         return rc;
76 }
77 EXPORT_SYMBOL(register_tcf_proto_ops);
78
79 static struct workqueue_struct *tc_filter_wq;
80
81 int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
82 {
83         struct tcf_proto_ops *t;
84         int rc = -ENOENT;
85
86         /* Wait for outstanding call_rcu()s, if any, from a
87          * tcf_proto_ops's destroy() handler.
88          */
89         rcu_barrier();
90         flush_workqueue(tc_filter_wq);
91
92         write_lock(&cls_mod_lock);
93         list_for_each_entry(t, &tcf_proto_base, head) {
94                 if (t == ops) {
95                         list_del(&t->head);
96                         rc = 0;
97                         break;
98                 }
99         }
100         write_unlock(&cls_mod_lock);
101         return rc;
102 }
103 EXPORT_SYMBOL(unregister_tcf_proto_ops);
104
105 bool tcf_queue_work(struct work_struct *work)
106 {
107         return queue_work(tc_filter_wq, work);
108 }
109 EXPORT_SYMBOL(tcf_queue_work);
110
111 /* Select new prio value from the range, managed by kernel. */
112
113 static inline u32 tcf_auto_prio(struct tcf_proto *tp)
114 {
115         u32 first = TC_H_MAKE(0xC0000000U, 0U);
116
117         if (tp)
118                 first = tp->prio - 1;
119
120         return TC_H_MAJ(first);
121 }
122
123 static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
124                                           u32 prio, u32 parent, struct Qdisc *q,
125                                           struct tcf_chain *chain)
126 {
127         struct tcf_proto *tp;
128         int err;
129
130         tp = kzalloc(sizeof(*tp), GFP_KERNEL);
131         if (!tp)
132                 return ERR_PTR(-ENOBUFS);
133
134         err = -ENOENT;
135         tp->ops = tcf_proto_lookup_ops(kind);
136         if (!tp->ops) {
137 #ifdef CONFIG_MODULES
138                 rtnl_unlock();
139                 request_module("cls_%s", kind);
140                 rtnl_lock();
141                 tp->ops = tcf_proto_lookup_ops(kind);
142                 /* We dropped the RTNL semaphore in order to perform
143                  * the module load. So, even if we succeeded in loading
144                  * the module we have to replay the request. We indicate
145                  * this using -EAGAIN.
146                  */
147                 if (tp->ops) {
148                         module_put(tp->ops->owner);
149                         err = -EAGAIN;
150                 } else {
151                         err = -ENOENT;
152                 }
153                 goto errout;
154 #endif
155         }
156         tp->classify = tp->ops->classify;
157         tp->protocol = protocol;
158         tp->prio = prio;
159         tp->classid = parent;
160         tp->q = q;
161         tp->chain = chain;
162
163         err = tp->ops->init(tp);
164         if (err) {
165                 module_put(tp->ops->owner);
166                 goto errout;
167         }
168         return tp;
169
170 errout:
171         kfree(tp);
172         return ERR_PTR(err);
173 }
174
175 static void tcf_proto_destroy(struct tcf_proto *tp)
176 {
177         tp->ops->destroy(tp);
178         module_put(tp->ops->owner);
179         kfree_rcu(tp, rcu);
180 }
181
182 static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
183                                           u32 chain_index)
184 {
185         struct tcf_chain *chain;
186
187         chain = kzalloc(sizeof(*chain), GFP_KERNEL);
188         if (!chain)
189                 return NULL;
190         list_add_tail(&chain->list, &block->chain_list);
191         chain->block = block;
192         chain->index = chain_index;
193         chain->refcnt = 1;
194         return chain;
195 }
196
197 static void tcf_chain_head_change(struct tcf_chain *chain,
198                                   struct tcf_proto *tp_head)
199 {
200         if (chain->chain_head_change)
201                 chain->chain_head_change(tp_head,
202                                          chain->chain_head_change_priv);
203 }
204
205 static void tcf_chain_flush(struct tcf_chain *chain)
206 {
207         struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
208
209         tcf_chain_head_change(chain, NULL);
210         while (tp) {
211                 RCU_INIT_POINTER(chain->filter_chain, tp->next);
212                 tcf_proto_destroy(tp);
213                 tp = rtnl_dereference(chain->filter_chain);
214                 tcf_chain_put(chain);
215         }
216 }
217
218 static void tcf_chain_destroy(struct tcf_chain *chain)
219 {
220         list_del(&chain->list);
221         kfree(chain);
222 }
223
224 static void tcf_chain_hold(struct tcf_chain *chain)
225 {
226         ++chain->refcnt;
227 }
228
229 struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
230                                 bool create)
231 {
232         struct tcf_chain *chain;
233
234         list_for_each_entry(chain, &block->chain_list, list) {
235                 if (chain->index == chain_index) {
236                         tcf_chain_hold(chain);
237                         return chain;
238                 }
239         }
240
241         return create ? tcf_chain_create(block, chain_index) : NULL;
242 }
243 EXPORT_SYMBOL(tcf_chain_get);
244
245 void tcf_chain_put(struct tcf_chain *chain)
246 {
247         if (--chain->refcnt == 0)
248                 tcf_chain_destroy(chain);
249 }
250 EXPORT_SYMBOL(tcf_chain_put);
251
252 static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q,
253                                   struct tcf_block_ext_info *ei,
254                                   enum tc_block_command command)
255 {
256         struct net_device *dev = q->dev_queue->dev;
257         struct tc_block_offload bo = {};
258
259         if (!dev->netdev_ops->ndo_setup_tc)
260                 return;
261         bo.command = command;
262         bo.binder_type = ei->binder_type;
263         bo.block = block;
264         dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
265 }
266
267 static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
268                                    struct tcf_block_ext_info *ei)
269 {
270         tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
271 }
272
273 static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
274                                      struct tcf_block_ext_info *ei)
275 {
276         tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND);
277 }
278
279 int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
280                       struct tcf_block_ext_info *ei)
281 {
282         struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
283         struct tcf_chain *chain;
284         int err;
285
286         if (!block)
287                 return -ENOMEM;
288         INIT_LIST_HEAD(&block->chain_list);
289         INIT_LIST_HEAD(&block->cb_list);
290
291         /* Create chain 0 by default, it has to be always present. */
292         chain = tcf_chain_create(block, 0);
293         if (!chain) {
294                 err = -ENOMEM;
295                 goto err_chain_create;
296         }
297         WARN_ON(!ei->chain_head_change);
298         chain->chain_head_change = ei->chain_head_change;
299         chain->chain_head_change_priv = ei->chain_head_change_priv;
300         block->net = qdisc_net(q);
301         block->q = q;
302         tcf_block_offload_bind(block, q, ei);
303         *p_block = block;
304         return 0;
305
306 err_chain_create:
307         kfree(block);
308         return err;
309 }
310 EXPORT_SYMBOL(tcf_block_get_ext);
311
312 static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
313 {
314         struct tcf_proto __rcu **p_filter_chain = priv;
315
316         rcu_assign_pointer(*p_filter_chain, tp_head);
317 }
318
319 int tcf_block_get(struct tcf_block **p_block,
320                   struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q)
321 {
322         struct tcf_block_ext_info ei = {
323                 .chain_head_change = tcf_chain_head_change_dflt,
324                 .chain_head_change_priv = p_filter_chain,
325         };
326
327         WARN_ON(!p_filter_chain);
328         return tcf_block_get_ext(p_block, q, &ei);
329 }
330 EXPORT_SYMBOL(tcf_block_get);
331
332 static void tcf_block_put_final(struct work_struct *work)
333 {
334         struct tcf_block *block = container_of(work, struct tcf_block, work);
335         struct tcf_chain *chain, *tmp;
336
337         rtnl_lock();
338
339         /* At this point, all the chains should have refcnt == 1. */
340         list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
341                 tcf_chain_put(chain);
342         rtnl_unlock();
343         kfree(block);
344 }
345
346 /* XXX: Standalone actions are not allowed to jump to any chain, and bound
347  * actions should be all removed after flushing.
348  */
349 void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
350                        struct tcf_block_ext_info *ei)
351 {
352         struct tcf_chain *chain;
353
354         /* Hold a refcnt for all chains, except 0, so that they don't disappear
355          * while we are iterating.
356          */
357         list_for_each_entry(chain, &block->chain_list, list)
358                 if (chain->index)
359                         tcf_chain_hold(chain);
360
361         list_for_each_entry(chain, &block->chain_list, list)
362                 tcf_chain_flush(chain);
363
364         tcf_block_offload_unbind(block, q, ei);
365
366         INIT_WORK(&block->work, tcf_block_put_final);
367         /* Wait for existing RCU callbacks to cool down, make sure their works
368          * have been queued before this. We can not flush pending works here
369          * because we are holding the RTNL lock.
370          */
371         rcu_barrier();
372         tcf_queue_work(&block->work);
373 }
374 EXPORT_SYMBOL(tcf_block_put_ext);
375
376 void tcf_block_put(struct tcf_block *block)
377 {
378         struct tcf_block_ext_info ei = {0, };
379
380         if (!block)
381                 return;
382         tcf_block_put_ext(block, block->q, &ei);
383 }
384
385 EXPORT_SYMBOL(tcf_block_put);
386
387 struct tcf_block_cb {
388         struct list_head list;
389         tc_setup_cb_t *cb;
390         void *cb_ident;
391         void *cb_priv;
392         unsigned int refcnt;
393 };
394
395 void *tcf_block_cb_priv(struct tcf_block_cb *block_cb)
396 {
397         return block_cb->cb_priv;
398 }
399 EXPORT_SYMBOL(tcf_block_cb_priv);
400
401 struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block,
402                                          tc_setup_cb_t *cb, void *cb_ident)
403 {       struct tcf_block_cb *block_cb;
404
405         list_for_each_entry(block_cb, &block->cb_list, list)
406                 if (block_cb->cb == cb && block_cb->cb_ident == cb_ident)
407                         return block_cb;
408         return NULL;
409 }
410 EXPORT_SYMBOL(tcf_block_cb_lookup);
411
412 void tcf_block_cb_incref(struct tcf_block_cb *block_cb)
413 {
414         block_cb->refcnt++;
415 }
416 EXPORT_SYMBOL(tcf_block_cb_incref);
417
418 unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb)
419 {
420         return --block_cb->refcnt;
421 }
422 EXPORT_SYMBOL(tcf_block_cb_decref);
423
424 struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block,
425                                              tc_setup_cb_t *cb, void *cb_ident,
426                                              void *cb_priv)
427 {
428         struct tcf_block_cb *block_cb;
429
430         block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL);
431         if (!block_cb)
432                 return NULL;
433         block_cb->cb = cb;
434         block_cb->cb_ident = cb_ident;
435         block_cb->cb_priv = cb_priv;
436         list_add(&block_cb->list, &block->cb_list);
437         return block_cb;
438 }
439 EXPORT_SYMBOL(__tcf_block_cb_register);
440
441 int tcf_block_cb_register(struct tcf_block *block,
442                           tc_setup_cb_t *cb, void *cb_ident,
443                           void *cb_priv)
444 {
445         struct tcf_block_cb *block_cb;
446
447         block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv);
448         return block_cb ? 0 : -ENOMEM;
449 }
450 EXPORT_SYMBOL(tcf_block_cb_register);
451
452 void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb)
453 {
454         list_del(&block_cb->list);
455         kfree(block_cb);
456 }
457 EXPORT_SYMBOL(__tcf_block_cb_unregister);
458
459 void tcf_block_cb_unregister(struct tcf_block *block,
460                              tc_setup_cb_t *cb, void *cb_ident)
461 {
462         struct tcf_block_cb *block_cb;
463
464         block_cb = tcf_block_cb_lookup(block, cb, cb_ident);
465         if (!block_cb)
466                 return;
467         __tcf_block_cb_unregister(block_cb);
468 }
469 EXPORT_SYMBOL(tcf_block_cb_unregister);
470
471 static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
472                              void *type_data, bool err_stop)
473 {
474         struct tcf_block_cb *block_cb;
475         int ok_count = 0;
476         int err;
477
478         list_for_each_entry(block_cb, &block->cb_list, list) {
479                 err = block_cb->cb(type, type_data, block_cb->cb_priv);
480                 if (err) {
481                         if (err_stop)
482                                 return err;
483                 } else {
484                         ok_count++;
485                 }
486         }
487         return ok_count;
488 }
489
490 /* Main classifier routine: scans classifier chain attached
491  * to this qdisc, (optionally) tests for protocol and asks
492  * specific classifiers.
493  */
494 int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
495                  struct tcf_result *res, bool compat_mode)
496 {
497         __be16 protocol = tc_skb_protocol(skb);
498 #ifdef CONFIG_NET_CLS_ACT
499         const int max_reclassify_loop = 4;
500         const struct tcf_proto *orig_tp = tp;
501         const struct tcf_proto *first_tp;
502         int limit = 0;
503
504 reclassify:
505 #endif
506         for (; tp; tp = rcu_dereference_bh(tp->next)) {
507                 int err;
508
509                 if (tp->protocol != protocol &&
510                     tp->protocol != htons(ETH_P_ALL))
511                         continue;
512
513                 err = tp->classify(skb, tp, res);
514 #ifdef CONFIG_NET_CLS_ACT
515                 if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
516                         first_tp = orig_tp;
517                         goto reset;
518                 } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
519                         first_tp = res->goto_tp;
520                         goto reset;
521                 }
522 #endif
523                 if (err >= 0)
524                         return err;
525         }
526
527         return TC_ACT_UNSPEC; /* signal: continue lookup */
528 #ifdef CONFIG_NET_CLS_ACT
529 reset:
530         if (unlikely(limit++ >= max_reclassify_loop)) {
531                 net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
532                                        tp->q->ops->id, tp->prio & 0xffff,
533                                        ntohs(tp->protocol));
534                 return TC_ACT_SHOT;
535         }
536
537         tp = first_tp;
538         protocol = tc_skb_protocol(skb);
539         goto reclassify;
540 #endif
541 }
542 EXPORT_SYMBOL(tcf_classify);
543
544 struct tcf_chain_info {
545         struct tcf_proto __rcu **pprev;
546         struct tcf_proto __rcu *next;
547 };
548
549 static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
550 {
551         return rtnl_dereference(*chain_info->pprev);
552 }
553
554 static void tcf_chain_tp_insert(struct tcf_chain *chain,
555                                 struct tcf_chain_info *chain_info,
556                                 struct tcf_proto *tp)
557 {
558         if (*chain_info->pprev == chain->filter_chain)
559                 tcf_chain_head_change(chain, tp);
560         RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
561         rcu_assign_pointer(*chain_info->pprev, tp);
562         tcf_chain_hold(chain);
563 }
564
565 static void tcf_chain_tp_remove(struct tcf_chain *chain,
566                                 struct tcf_chain_info *chain_info,
567                                 struct tcf_proto *tp)
568 {
569         struct tcf_proto *next = rtnl_dereference(chain_info->next);
570
571         if (tp == chain->filter_chain)
572                 tcf_chain_head_change(chain, next);
573         RCU_INIT_POINTER(*chain_info->pprev, next);
574         tcf_chain_put(chain);
575 }
576
577 static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
578                                            struct tcf_chain_info *chain_info,
579                                            u32 protocol, u32 prio,
580                                            bool prio_allocate)
581 {
582         struct tcf_proto **pprev;
583         struct tcf_proto *tp;
584
585         /* Check the chain for existence of proto-tcf with this priority */
586         for (pprev = &chain->filter_chain;
587              (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
588                 if (tp->prio >= prio) {
589                         if (tp->prio == prio) {
590                                 if (prio_allocate ||
591                                     (tp->protocol != protocol && protocol))
592                                         return ERR_PTR(-EINVAL);
593                         } else {
594                                 tp = NULL;
595                         }
596                         break;
597                 }
598         }
599         chain_info->pprev = pprev;
600         chain_info->next = tp ? tp->next : NULL;
601         return tp;
602 }
603
604 static int tcf_fill_node(struct net *net, struct sk_buff *skb,
605                          struct tcf_proto *tp, struct Qdisc *q, u32 parent,
606                          void *fh, u32 portid, u32 seq, u16 flags, int event)
607 {
608         struct tcmsg *tcm;
609         struct nlmsghdr  *nlh;
610         unsigned char *b = skb_tail_pointer(skb);
611
612         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
613         if (!nlh)
614                 goto out_nlmsg_trim;
615         tcm = nlmsg_data(nlh);
616         tcm->tcm_family = AF_UNSPEC;
617         tcm->tcm__pad1 = 0;
618         tcm->tcm__pad2 = 0;
619         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
620         tcm->tcm_parent = parent;
621         tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
622         if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
623                 goto nla_put_failure;
624         if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
625                 goto nla_put_failure;
626         if (!fh) {
627                 tcm->tcm_handle = 0;
628         } else {
629                 if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
630                         goto nla_put_failure;
631         }
632         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
633         return skb->len;
634
635 out_nlmsg_trim:
636 nla_put_failure:
637         nlmsg_trim(skb, b);
638         return -1;
639 }
640
641 static int tfilter_notify(struct net *net, struct sk_buff *oskb,
642                           struct nlmsghdr *n, struct tcf_proto *tp,
643                           struct Qdisc *q, u32 parent,
644                           void *fh, int event, bool unicast)
645 {
646         struct sk_buff *skb;
647         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
648
649         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
650         if (!skb)
651                 return -ENOBUFS;
652
653         if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
654                           n->nlmsg_flags, event) <= 0) {
655                 kfree_skb(skb);
656                 return -EINVAL;
657         }
658
659         if (unicast)
660                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
661
662         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
663                               n->nlmsg_flags & NLM_F_ECHO);
664 }
665
666 static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
667                               struct nlmsghdr *n, struct tcf_proto *tp,
668                               struct Qdisc *q, u32 parent,
669                               void *fh, bool unicast, bool *last)
670 {
671         struct sk_buff *skb;
672         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
673         int err;
674
675         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
676         if (!skb)
677                 return -ENOBUFS;
678
679         if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq,
680                           n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
681                 kfree_skb(skb);
682                 return -EINVAL;
683         }
684
685         err = tp->ops->delete(tp, fh, last);
686         if (err) {
687                 kfree_skb(skb);
688                 return err;
689         }
690
691         if (unicast)
692                 return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
693
694         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
695                               n->nlmsg_flags & NLM_F_ECHO);
696 }
697
698 static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
699                                  struct Qdisc *q, u32 parent,
700                                  struct nlmsghdr *n,
701                                  struct tcf_chain *chain, int event)
702 {
703         struct tcf_proto *tp;
704
705         for (tp = rtnl_dereference(chain->filter_chain);
706              tp; tp = rtnl_dereference(tp->next))
707                 tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false);
708 }
709
710 /* Add/change/delete/get a filter node */
711
712 static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
713                           struct netlink_ext_ack *extack)
714 {
715         struct net *net = sock_net(skb->sk);
716         struct nlattr *tca[TCA_MAX + 1];
717         struct tcmsg *t;
718         u32 protocol;
719         u32 prio;
720         bool prio_allocate;
721         u32 parent;
722         u32 chain_index;
723         struct net_device *dev;
724         struct Qdisc  *q;
725         struct tcf_chain_info chain_info;
726         struct tcf_chain *chain = NULL;
727         struct tcf_block *block;
728         struct tcf_proto *tp;
729         const struct Qdisc_class_ops *cops;
730         unsigned long cl;
731         void *fh;
732         int err;
733         int tp_created;
734
735         if ((n->nlmsg_type != RTM_GETTFILTER) &&
736             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
737                 return -EPERM;
738
739 replay:
740         tp_created = 0;
741
742         err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
743         if (err < 0)
744                 return err;
745
746         t = nlmsg_data(n);
747         protocol = TC_H_MIN(t->tcm_info);
748         prio = TC_H_MAJ(t->tcm_info);
749         prio_allocate = false;
750         parent = t->tcm_parent;
751         cl = 0;
752
753         if (prio == 0) {
754                 switch (n->nlmsg_type) {
755                 case RTM_DELTFILTER:
756                         if (protocol || t->tcm_handle || tca[TCA_KIND])
757                                 return -ENOENT;
758                         break;
759                 case RTM_NEWTFILTER:
760                         /* If no priority is provided by the user,
761                          * we allocate one.
762                          */
763                         if (n->nlmsg_flags & NLM_F_CREATE) {
764                                 prio = TC_H_MAKE(0x80000000U, 0U);
765                                 prio_allocate = true;
766                                 break;
767                         }
768                         /* fall-through */
769                 default:
770                         return -ENOENT;
771                 }
772         }
773
774         /* Find head of filter chain. */
775
776         /* Find link */
777         dev = __dev_get_by_index(net, t->tcm_ifindex);
778         if (dev == NULL)
779                 return -ENODEV;
780
781         /* Find qdisc */
782         if (!parent) {
783                 q = dev->qdisc;
784                 parent = q->handle;
785         } else {
786                 q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
787                 if (q == NULL)
788                         return -EINVAL;
789         }
790
791         /* Is it classful? */
792         cops = q->ops->cl_ops;
793         if (!cops)
794                 return -EINVAL;
795
796         if (!cops->tcf_block)
797                 return -EOPNOTSUPP;
798
799         /* Do we search for filter, attached to class? */
800         if (TC_H_MIN(parent)) {
801                 cl = cops->find(q, parent);
802                 if (cl == 0)
803                         return -ENOENT;
804         }
805
806         /* And the last stroke */
807         block = cops->tcf_block(q, cl);
808         if (!block) {
809                 err = -EINVAL;
810                 goto errout;
811         }
812
813         chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
814         if (chain_index > TC_ACT_EXT_VAL_MASK) {
815                 err = -EINVAL;
816                 goto errout;
817         }
818         chain = tcf_chain_get(block, chain_index,
819                               n->nlmsg_type == RTM_NEWTFILTER);
820         if (!chain) {
821                 err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
822                 goto errout;
823         }
824
825         if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
826                 tfilter_notify_chain(net, skb, q, parent, n,
827                                      chain, RTM_DELTFILTER);
828                 tcf_chain_flush(chain);
829                 err = 0;
830                 goto errout;
831         }
832
833         tp = tcf_chain_tp_find(chain, &chain_info, protocol,
834                                prio, prio_allocate);
835         if (IS_ERR(tp)) {
836                 err = PTR_ERR(tp);
837                 goto errout;
838         }
839
840         if (tp == NULL) {
841                 /* Proto-tcf does not exist, create new one */
842
843                 if (tca[TCA_KIND] == NULL || !protocol) {
844                         err = -EINVAL;
845                         goto errout;
846                 }
847
848                 if (n->nlmsg_type != RTM_NEWTFILTER ||
849                     !(n->nlmsg_flags & NLM_F_CREATE)) {
850                         err = -ENOENT;
851                         goto errout;
852                 }
853
854                 if (prio_allocate)
855                         prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
856
857                 tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
858                                       protocol, prio, parent, q, chain);
859                 if (IS_ERR(tp)) {
860                         err = PTR_ERR(tp);
861                         goto errout;
862                 }
863                 tp_created = 1;
864         } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
865                 err = -EINVAL;
866                 goto errout;
867         }
868
869         fh = tp->ops->get(tp, t->tcm_handle);
870
871         if (!fh) {
872                 if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
873                         tcf_chain_tp_remove(chain, &chain_info, tp);
874                         tfilter_notify(net, skb, n, tp, q, parent, fh,
875                                        RTM_DELTFILTER, false);
876                         tcf_proto_destroy(tp);
877                         err = 0;
878                         goto errout;
879                 }
880
881                 if (n->nlmsg_type != RTM_NEWTFILTER ||
882                     !(n->nlmsg_flags & NLM_F_CREATE)) {
883                         err = -ENOENT;
884                         goto errout;
885                 }
886         } else {
887                 bool last;
888
889                 switch (n->nlmsg_type) {
890                 case RTM_NEWTFILTER:
891                         if (n->nlmsg_flags & NLM_F_EXCL) {
892                                 if (tp_created)
893                                         tcf_proto_destroy(tp);
894                                 err = -EEXIST;
895                                 goto errout;
896                         }
897                         break;
898                 case RTM_DELTFILTER:
899                         err = tfilter_del_notify(net, skb, n, tp, q, parent,
900                                                  fh, false, &last);
901                         if (err)
902                                 goto errout;
903                         if (last) {
904                                 tcf_chain_tp_remove(chain, &chain_info, tp);
905                                 tcf_proto_destroy(tp);
906                         }
907                         goto errout;
908                 case RTM_GETTFILTER:
909                         err = tfilter_notify(net, skb, n, tp, q, parent, fh,
910                                              RTM_NEWTFILTER, true);
911                         goto errout;
912                 default:
913                         err = -EINVAL;
914                         goto errout;
915                 }
916         }
917
918         err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
919                               n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE);
920         if (err == 0) {
921                 if (tp_created)
922                         tcf_chain_tp_insert(chain, &chain_info, tp);
923                 tfilter_notify(net, skb, n, tp, q, parent, fh,
924                                RTM_NEWTFILTER, false);
925         } else {
926                 if (tp_created)
927                         tcf_proto_destroy(tp);
928         }
929
930 errout:
931         if (chain)
932                 tcf_chain_put(chain);
933         if (err == -EAGAIN)
934                 /* Replay the request. */
935                 goto replay;
936         return err;
937 }
938
939 struct tcf_dump_args {
940         struct tcf_walker w;
941         struct sk_buff *skb;
942         struct netlink_callback *cb;
943         struct Qdisc *q;
944         u32 parent;
945 };
946
947 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
948 {
949         struct tcf_dump_args *a = (void *)arg;
950         struct net *net = sock_net(a->skb->sk);
951
952         return tcf_fill_node(net, a->skb, tp, a->q, a->parent,
953                              n, NETLINK_CB(a->cb->skb).portid,
954                              a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
955                              RTM_NEWTFILTER);
956 }
957
958 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
959                            struct sk_buff *skb, struct netlink_callback *cb,
960                            long index_start, long *p_index)
961 {
962         struct net *net = sock_net(skb->sk);
963         struct tcmsg *tcm = nlmsg_data(cb->nlh);
964         struct tcf_dump_args arg;
965         struct tcf_proto *tp;
966
967         for (tp = rtnl_dereference(chain->filter_chain);
968              tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
969                 if (*p_index < index_start)
970                         continue;
971                 if (TC_H_MAJ(tcm->tcm_info) &&
972                     TC_H_MAJ(tcm->tcm_info) != tp->prio)
973                         continue;
974                 if (TC_H_MIN(tcm->tcm_info) &&
975                     TC_H_MIN(tcm->tcm_info) != tp->protocol)
976                         continue;
977                 if (*p_index > index_start)
978                         memset(&cb->args[1], 0,
979                                sizeof(cb->args) - sizeof(cb->args[0]));
980                 if (cb->args[1] == 0) {
981                         if (tcf_fill_node(net, skb, tp, q, parent, 0,
982                                           NETLINK_CB(cb->skb).portid,
983                                           cb->nlh->nlmsg_seq, NLM_F_MULTI,
984                                           RTM_NEWTFILTER) <= 0)
985                                 return false;
986
987                         cb->args[1] = 1;
988                 }
989                 if (!tp->ops->walk)
990                         continue;
991                 arg.w.fn = tcf_node_dump;
992                 arg.skb = skb;
993                 arg.cb = cb;
994                 arg.q = q;
995                 arg.parent = parent;
996                 arg.w.stop = 0;
997                 arg.w.skip = cb->args[1] - 1;
998                 arg.w.count = 0;
999                 tp->ops->walk(tp, &arg.w);
1000                 cb->args[1] = arg.w.count + 1;
1001                 if (arg.w.stop)
1002                         return false;
1003         }
1004         return true;
1005 }
1006
1007 /* called with RTNL */
1008 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
1009 {
1010         struct net *net = sock_net(skb->sk);
1011         struct nlattr *tca[TCA_MAX + 1];
1012         struct net_device *dev;
1013         struct Qdisc *q;
1014         struct tcf_block *block;
1015         struct tcf_chain *chain;
1016         struct tcmsg *tcm = nlmsg_data(cb->nlh);
1017         unsigned long cl = 0;
1018         const struct Qdisc_class_ops *cops;
1019         long index_start;
1020         long index;
1021         u32 parent;
1022         int err;
1023
1024         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1025                 return skb->len;
1026
1027         err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
1028         if (err)
1029                 return err;
1030
1031         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1032         if (!dev)
1033                 return skb->len;
1034
1035         parent = tcm->tcm_parent;
1036         if (!parent) {
1037                 q = dev->qdisc;
1038                 parent = q->handle;
1039         } else {
1040                 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
1041         }
1042         if (!q)
1043                 goto out;
1044         cops = q->ops->cl_ops;
1045         if (!cops)
1046                 goto out;
1047         if (!cops->tcf_block)
1048                 goto out;
1049         if (TC_H_MIN(tcm->tcm_parent)) {
1050                 cl = cops->find(q, tcm->tcm_parent);
1051                 if (cl == 0)
1052                         goto out;
1053         }
1054         block = cops->tcf_block(q, cl);
1055         if (!block)
1056                 goto out;
1057
1058         index_start = cb->args[0];
1059         index = 0;
1060
1061         list_for_each_entry(chain, &block->chain_list, list) {
1062                 if (tca[TCA_CHAIN] &&
1063                     nla_get_u32(tca[TCA_CHAIN]) != chain->index)
1064                         continue;
1065                 if (!tcf_chain_dump(chain, q, parent, skb, cb,
1066                                     index_start, &index))
1067                         break;
1068         }
1069
1070         cb->args[0] = index;
1071
1072 out:
1073         return skb->len;
1074 }
1075
1076 void tcf_exts_destroy(struct tcf_exts *exts)
1077 {
1078 #ifdef CONFIG_NET_CLS_ACT
1079         LIST_HEAD(actions);
1080
1081         ASSERT_RTNL();
1082         tcf_exts_to_list(exts, &actions);
1083         tcf_action_destroy(&actions, TCA_ACT_UNBIND);
1084         kfree(exts->actions);
1085         exts->nr_actions = 0;
1086 #endif
1087 }
1088 EXPORT_SYMBOL(tcf_exts_destroy);
1089
1090 int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
1091                       struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr)
1092 {
1093 #ifdef CONFIG_NET_CLS_ACT
1094         {
1095                 struct tc_action *act;
1096
1097                 if (exts->police && tb[exts->police]) {
1098                         act = tcf_action_init_1(net, tp, tb[exts->police],
1099                                                 rate_tlv, "police", ovr,
1100                                                 TCA_ACT_BIND);
1101                         if (IS_ERR(act))
1102                                 return PTR_ERR(act);
1103
1104                         act->type = exts->type = TCA_OLD_COMPAT;
1105                         exts->actions[0] = act;
1106                         exts->nr_actions = 1;
1107                 } else if (exts->action && tb[exts->action]) {
1108                         LIST_HEAD(actions);
1109                         int err, i = 0;
1110
1111                         err = tcf_action_init(net, tp, tb[exts->action],
1112                                               rate_tlv, NULL, ovr, TCA_ACT_BIND,
1113                                               &actions);
1114                         if (err)
1115                                 return err;
1116                         list_for_each_entry(act, &actions, list)
1117                                 exts->actions[i++] = act;
1118                         exts->nr_actions = i;
1119                 }
1120                 exts->net = net;
1121         }
1122 #else
1123         if ((exts->action && tb[exts->action]) ||
1124             (exts->police && tb[exts->police]))
1125                 return -EOPNOTSUPP;
1126 #endif
1127
1128         return 0;
1129 }
1130 EXPORT_SYMBOL(tcf_exts_validate);
1131
1132 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
1133 {
1134 #ifdef CONFIG_NET_CLS_ACT
1135         struct tcf_exts old = *dst;
1136
1137         *dst = *src;
1138         tcf_exts_destroy(&old);
1139 #endif
1140 }
1141 EXPORT_SYMBOL(tcf_exts_change);
1142
1143 #ifdef CONFIG_NET_CLS_ACT
1144 static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
1145 {
1146         if (exts->nr_actions == 0)
1147                 return NULL;
1148         else
1149                 return exts->actions[0];
1150 }
1151 #endif
1152
1153 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
1154 {
1155 #ifdef CONFIG_NET_CLS_ACT
1156         struct nlattr *nest;
1157
1158         if (exts->action && tcf_exts_has_actions(exts)) {
1159                 /*
1160                  * again for backward compatible mode - we want
1161                  * to work with both old and new modes of entering
1162                  * tc data even if iproute2  was newer - jhs
1163                  */
1164                 if (exts->type != TCA_OLD_COMPAT) {
1165                         LIST_HEAD(actions);
1166
1167                         nest = nla_nest_start(skb, exts->action);
1168                         if (nest == NULL)
1169                                 goto nla_put_failure;
1170
1171                         tcf_exts_to_list(exts, &actions);
1172                         if (tcf_action_dump(skb, &actions, 0, 0) < 0)
1173                                 goto nla_put_failure;
1174                         nla_nest_end(skb, nest);
1175                 } else if (exts->police) {
1176                         struct tc_action *act = tcf_exts_first_act(exts);
1177                         nest = nla_nest_start(skb, exts->police);
1178                         if (nest == NULL || !act)
1179                                 goto nla_put_failure;
1180                         if (tcf_action_dump_old(skb, act, 0, 0) < 0)
1181                                 goto nla_put_failure;
1182                         nla_nest_end(skb, nest);
1183                 }
1184         }
1185         return 0;
1186
1187 nla_put_failure:
1188         nla_nest_cancel(skb, nest);
1189         return -1;
1190 #else
1191         return 0;
1192 #endif
1193 }
1194 EXPORT_SYMBOL(tcf_exts_dump);
1195
1196
1197 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
1198 {
1199 #ifdef CONFIG_NET_CLS_ACT
1200         struct tc_action *a = tcf_exts_first_act(exts);
1201         if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
1202                 return -1;
1203 #endif
1204         return 0;
1205 }
1206 EXPORT_SYMBOL(tcf_exts_dump_stats);
1207
1208 static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
1209                                        enum tc_setup_type type,
1210                                        void *type_data, bool err_stop)
1211 {
1212         int ok_count = 0;
1213 #ifdef CONFIG_NET_CLS_ACT
1214         const struct tc_action *a;
1215         struct net_device *dev;
1216         int i, ret;
1217
1218         if (!tcf_exts_has_actions(exts))
1219                 return 0;
1220
1221         for (i = 0; i < exts->nr_actions; i++) {
1222                 a = exts->actions[i];
1223                 if (!a->ops->get_dev)
1224                         continue;
1225                 dev = a->ops->get_dev(a);
1226                 if (!dev)
1227                         continue;
1228                 ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop);
1229                 if (ret < 0)
1230                         return ret;
1231                 ok_count += ret;
1232         }
1233 #endif
1234         return ok_count;
1235 }
1236
1237 int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
1238                      enum tc_setup_type type, void *type_data, bool err_stop)
1239 {
1240         int ok_count;
1241         int ret;
1242
1243         ret = tcf_block_cb_call(block, type, type_data, err_stop);
1244         if (ret < 0)
1245                 return ret;
1246         ok_count = ret;
1247
1248         if (!exts)
1249                 return ok_count;
1250         ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
1251         if (ret < 0)
1252                 return ret;
1253         ok_count += ret;
1254
1255         return ok_count;
1256 }
1257 EXPORT_SYMBOL(tc_setup_cb_call);
1258
1259 static int __init tc_filter_init(void)
1260 {
1261         tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
1262         if (!tc_filter_wq)
1263                 return -ENOMEM;
1264
1265         rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
1266         rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
1267         rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
1268                       tc_dump_tfilter, 0);
1269
1270         return 0;
1271 }
1272
1273 subsys_initcall(tc_filter_init);