2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/init.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/kmod.h>
34 #include <linux/list.h>
35 #include <linux/bitops.h>
36 #include <linux/hrtimer.h>
38 #include <net/netlink.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
46 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
47 struct Qdisc *old, struct Qdisc *new);
48 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
49 struct Qdisc *q, unsigned long cl, int event);
56 This file consists of two interrelated parts:
58 1. queueing disciplines manager frontend.
59 2. traffic classes manager frontend.
61 Generally, queueing discipline ("qdisc") is a black box,
62 which is able to enqueue packets and to dequeue them (when
63 device is ready to send something) in order and at times
64 determined by algorithm hidden in it.
66 qdisc's are divided to two categories:
67 - "queues", which have no internal structure visible from outside.
68 - "schedulers", which split all the packets to "traffic classes",
69 using "packet classifiers" (look at cls_api.c)
71 In turn, classes may have child qdiscs (as rule, queues)
72 attached to them etc. etc. etc.
74 The goal of the routines in this file is to translate
75 information supplied by user in the form of handles
76 to more intelligible for kernel form, to make some sanity
77 checks and part of work, which is common to all qdiscs
78 and to provide rtnetlink notifications.
80 All real intelligent work is done inside qdisc modules.
84 Every discipline has two major routines: enqueue and dequeue.
88 dequeue usually returns a skb to send. It is allowed to return NULL,
89 but it does not mean that queue is empty, it just means that
90 discipline does not want to send anything this time.
91 Queue is really empty if q->q.qlen == 0.
92 For complicated disciplines with multiple queues q->q is not
93 real packet queue, but however q->q.qlen must be valid.
97 enqueue returns 0, if packet was enqueued successfully.
98 If packet (this one or another one) was dropped, it returns
100 NET_XMIT_DROP - this packet dropped
101 Expected action: do not backoff, but wait until queue will clear.
102 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
103 Expected action: backoff or ignore
104 NET_XMIT_POLICED - dropped by police.
105 Expected action: backoff or error to real-time apps.
111 requeues once dequeued packet. It is used for non-standard or
112 just buggy devices, which can defer output even if dev->tbusy=0.
116 returns qdisc to initial state: purge all buffers, clear all
117 timers, counters (except for statistics) etc.
121 initializes newly created qdisc.
125 destroys resources allocated by init and during lifetime of qdisc.
129 changes qdisc parameters.
132 /* Protects list of registered TC modules. It is pure SMP lock. */
133 static DEFINE_RWLOCK(qdisc_mod_lock);
136 /************************************************
137 * Queueing disciplines manipulation. *
138 ************************************************/
141 /* The list of all installed queueing disciplines. */
143 static struct Qdisc_ops *qdisc_base;
145 /* Register/uregister queueing discipline */
147 int register_qdisc(struct Qdisc_ops *qops)
149 struct Qdisc_ops *q, **qp;
152 write_lock(&qdisc_mod_lock);
153 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
154 if (!strcmp(qops->id, q->id))
157 if (qops->enqueue == NULL)
158 qops->enqueue = noop_qdisc_ops.enqueue;
159 if (qops->requeue == NULL)
160 qops->requeue = noop_qdisc_ops.requeue;
161 if (qops->dequeue == NULL)
162 qops->dequeue = noop_qdisc_ops.dequeue;
168 write_unlock(&qdisc_mod_lock);
172 int unregister_qdisc(struct Qdisc_ops *qops)
174 struct Qdisc_ops *q, **qp;
177 write_lock(&qdisc_mod_lock);
178 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
186 write_unlock(&qdisc_mod_lock);
190 /* We know handle. Find qdisc among all qdisc's attached to device
191 (root qdisc, all its children, children of children etc.)
194 static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
198 list_for_each_entry(q, &dev->qdisc_list, list) {
199 if (q->handle == handle)
205 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
209 read_lock(&qdisc_tree_lock);
210 q = __qdisc_lookup(dev, handle);
211 read_unlock(&qdisc_tree_lock);
215 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
219 struct Qdisc_class_ops *cops = p->ops->cl_ops;
223 cl = cops->get(p, classid);
227 leaf = cops->leaf(p, cl);
232 /* Find queueing discipline by name */
234 static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
236 struct Qdisc_ops *q = NULL;
239 read_lock(&qdisc_mod_lock);
240 for (q = qdisc_base; q; q = q->next) {
241 if (rtattr_strcmp(kind, q->id) == 0) {
242 if (!try_module_get(q->owner))
247 read_unlock(&qdisc_mod_lock);
252 static struct qdisc_rate_table *qdisc_rtab_list;
254 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
256 struct qdisc_rate_table *rtab;
258 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
259 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
265 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
268 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
272 memcpy(rtab->data, RTA_DATA(tab), 1024);
273 rtab->next = qdisc_rtab_list;
274 qdisc_rtab_list = rtab;
279 void qdisc_put_rtab(struct qdisc_rate_table *tab)
281 struct qdisc_rate_table *rtab, **rtabp;
283 if (!tab || --tab->refcnt)
286 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
295 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
297 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
299 struct net_device *dev = wd->qdisc->dev;
301 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
303 if (spin_trylock(&dev->queue_lock)) {
305 spin_unlock(&dev->queue_lock);
309 return HRTIMER_NORESTART;
312 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
314 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
315 wd->timer.function = qdisc_watchdog;
318 EXPORT_SYMBOL(qdisc_watchdog_init);
320 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
324 wd->qdisc->flags |= TCQ_F_THROTTLED;
325 time = ktime_set(0, 0);
326 time = ktime_add_ns(time, PSCHED_US2NS(expires));
327 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
329 EXPORT_SYMBOL(qdisc_watchdog_schedule);
331 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
333 hrtimer_cancel(&wd->timer);
334 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
336 EXPORT_SYMBOL(qdisc_watchdog_cancel);
338 /* Allocate an unique handle from space managed by kernel */
340 static u32 qdisc_alloc_handle(struct net_device *dev)
343 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
346 autohandle += TC_H_MAKE(0x10000U, 0);
347 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
348 autohandle = TC_H_MAKE(0x80000000U, 0);
349 } while (qdisc_lookup(dev, autohandle) && --i > 0);
351 return i>0 ? autohandle : 0;
354 /* Attach toplevel qdisc to device dev */
356 static struct Qdisc *
357 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
359 struct Qdisc *oqdisc;
361 if (dev->flags & IFF_UP)
364 qdisc_lock_tree(dev);
365 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
366 oqdisc = dev->qdisc_ingress;
367 /* Prune old scheduler */
368 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
371 dev->qdisc_ingress = NULL;
373 dev->qdisc_ingress = qdisc;
378 oqdisc = dev->qdisc_sleeping;
380 /* Prune old scheduler */
381 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
384 /* ... and graft new one */
387 dev->qdisc_sleeping = qdisc;
388 dev->qdisc = &noop_qdisc;
391 qdisc_unlock_tree(dev);
393 if (dev->flags & IFF_UP)
399 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
401 struct Qdisc_class_ops *cops;
407 while ((parentid = sch->parent)) {
408 sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
409 cops = sch->ops->cl_ops;
410 if (cops->qlen_notify) {
411 cl = cops->get(sch, parentid);
412 cops->qlen_notify(sch, cl);
418 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
420 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
423 Old qdisc is not destroyed but returned in *old.
426 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
428 struct Qdisc *new, struct Qdisc **old)
431 struct Qdisc *q = *old;
434 if (parent == NULL) {
435 if (q && q->flags&TCQ_F_INGRESS) {
436 *old = dev_graft_qdisc(dev, q);
438 *old = dev_graft_qdisc(dev, new);
441 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
446 unsigned long cl = cops->get(parent, classid);
448 err = cops->graft(parent, cl, new, old);
450 new->parent = classid;
451 cops->put(parent, cl);
459 Allocate and initialize new qdisc.
461 Parameters are passed via opt.
464 static struct Qdisc *
465 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
468 struct rtattr *kind = tca[TCA_KIND-1];
470 struct Qdisc_ops *ops;
472 ops = qdisc_lookup_ops(kind);
474 if (ops == NULL && kind != NULL) {
476 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
477 /* We dropped the RTNL semaphore in order to
478 * perform the module load. So, even if we
479 * succeeded in loading the module we have to
480 * tell the caller to replay the request. We
481 * indicate this using -EAGAIN.
482 * We replay the request because the device may
483 * go away in the mean time.
486 request_module("sch_%s", name);
488 ops = qdisc_lookup_ops(kind);
490 /* We will try again qdisc_lookup_ops,
491 * so don't keep a reference.
493 module_put(ops->owner);
505 sch = qdisc_alloc(dev, ops);
511 if (handle == TC_H_INGRESS) {
512 sch->flags |= TCQ_F_INGRESS;
513 handle = TC_H_MAKE(TC_H_INGRESS, 0);
514 } else if (handle == 0) {
515 handle = qdisc_alloc_handle(dev);
521 sch->handle = handle;
523 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
524 #ifdef CONFIG_NET_ESTIMATOR
525 if (tca[TCA_RATE-1]) {
526 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
531 * Any broken qdiscs that would require
532 * a ops->reset() here? The qdisc was never
533 * in action so it shouldn't be necessary.
541 qdisc_lock_tree(dev);
542 list_add_tail(&sch->list, &dev->qdisc_list);
543 qdisc_unlock_tree(dev);
549 kfree((char *) sch - sch->padded);
551 module_put(ops->owner);
557 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
559 if (tca[TCA_OPTIONS-1]) {
562 if (sch->ops->change == NULL)
564 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
568 #ifdef CONFIG_NET_ESTIMATOR
570 gen_replace_estimator(&sch->bstats, &sch->rate_est,
571 sch->stats_lock, tca[TCA_RATE-1]);
576 struct check_loop_arg
578 struct qdisc_walker w;
583 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
585 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
587 struct check_loop_arg arg;
589 if (q->ops->cl_ops == NULL)
592 arg.w.stop = arg.w.skip = arg.w.count = 0;
593 arg.w.fn = check_loop_fn;
596 q->ops->cl_ops->walk(q, &arg.w);
597 return arg.w.stop ? -ELOOP : 0;
601 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
604 struct Qdisc_class_ops *cops = q->ops->cl_ops;
605 struct check_loop_arg *arg = (struct check_loop_arg *)w;
607 leaf = cops->leaf(q, cl);
609 if (leaf == arg->p || arg->depth > 7)
611 return check_loop(leaf, arg->p, arg->depth + 1);
620 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
622 struct tcmsg *tcm = NLMSG_DATA(n);
623 struct rtattr **tca = arg;
624 struct net_device *dev;
625 u32 clid = tcm->tcm_parent;
626 struct Qdisc *q = NULL;
627 struct Qdisc *p = NULL;
630 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
634 if (clid != TC_H_ROOT) {
635 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
636 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
638 q = qdisc_leaf(p, clid);
639 } else { /* ingress */
640 q = dev->qdisc_ingress;
643 q = dev->qdisc_sleeping;
648 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
651 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
655 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
658 if (n->nlmsg_type == RTM_DELQDISC) {
663 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
666 qdisc_notify(skb, n, clid, q, NULL);
667 spin_lock_bh(&dev->queue_lock);
669 spin_unlock_bh(&dev->queue_lock);
672 qdisc_notify(skb, n, clid, NULL, q);
681 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
685 struct net_device *dev;
691 /* Reinit, just in case something touches this. */
694 clid = tcm->tcm_parent;
697 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
701 if (clid != TC_H_ROOT) {
702 if (clid != TC_H_INGRESS) {
703 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
705 q = qdisc_leaf(p, clid);
706 } else { /*ingress */
707 q = dev->qdisc_ingress;
710 q = dev->qdisc_sleeping;
713 /* It may be default qdisc, ignore it */
714 if (q && q->handle == 0)
717 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
718 if (tcm->tcm_handle) {
719 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
721 if (TC_H_MIN(tcm->tcm_handle))
723 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
725 if (n->nlmsg_flags&NLM_F_EXCL)
727 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
730 (p && check_loop(q, p, 0)))
732 atomic_inc(&q->refcnt);
738 /* This magic test requires explanation.
740 * We know, that some child q is already
741 * attached to this parent and have choice:
742 * either to change it or to create/graft new one.
744 * 1. We are allowed to create/graft only
745 * if CREATE and REPLACE flags are set.
747 * 2. If EXCL is set, requestor wanted to say,
748 * that qdisc tcm_handle is not expected
749 * to exist, so that we choose create/graft too.
751 * 3. The last case is when no flags are set.
752 * Alas, it is sort of hole in API, we
753 * cannot decide what to do unambiguously.
754 * For now we select create/graft, if
755 * user gave KIND, which does not match existing.
757 if ((n->nlmsg_flags&NLM_F_CREATE) &&
758 (n->nlmsg_flags&NLM_F_REPLACE) &&
759 ((n->nlmsg_flags&NLM_F_EXCL) ||
761 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
766 if (!tcm->tcm_handle)
768 q = qdisc_lookup(dev, tcm->tcm_handle);
771 /* Change qdisc parameters */
774 if (n->nlmsg_flags&NLM_F_EXCL)
776 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
778 err = qdisc_change(q, tca);
780 qdisc_notify(skb, n, clid, NULL, q);
784 if (!(n->nlmsg_flags&NLM_F_CREATE))
786 if (clid == TC_H_INGRESS)
787 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
789 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
798 struct Qdisc *old_q = NULL;
799 err = qdisc_graft(dev, p, clid, q, &old_q);
802 spin_lock_bh(&dev->queue_lock);
804 spin_unlock_bh(&dev->queue_lock);
808 qdisc_notify(skb, n, clid, old_q, q);
810 spin_lock_bh(&dev->queue_lock);
811 qdisc_destroy(old_q);
812 spin_unlock_bh(&dev->queue_lock);
818 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
819 u32 pid, u32 seq, u16 flags, int event)
822 struct nlmsghdr *nlh;
823 unsigned char *b = skb_tail_pointer(skb);
826 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
827 tcm = NLMSG_DATA(nlh);
828 tcm->tcm_family = AF_UNSPEC;
831 tcm->tcm_ifindex = q->dev->ifindex;
832 tcm->tcm_parent = clid;
833 tcm->tcm_handle = q->handle;
834 tcm->tcm_info = atomic_read(&q->refcnt);
835 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
836 if (q->ops->dump && q->ops->dump(q, skb) < 0)
838 q->qstats.qlen = q->q.qlen;
840 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
841 TCA_XSTATS, q->stats_lock, &d) < 0)
844 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
847 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
848 #ifdef CONFIG_NET_ESTIMATOR
849 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
851 gnet_stats_copy_queue(&d, &q->qstats) < 0)
854 if (gnet_stats_finish_copy(&d) < 0)
857 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
866 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
867 u32 clid, struct Qdisc *old, struct Qdisc *new)
870 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
872 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
876 if (old && old->handle) {
877 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
881 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
886 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
893 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
897 struct net_device *dev;
901 s_q_idx = q_idx = cb->args[1];
902 read_lock(&dev_base_lock);
903 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
908 read_lock(&qdisc_tree_lock);
910 list_for_each_entry(q, &dev->qdisc_list, list) {
911 if (q_idx < s_q_idx) {
915 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
916 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
917 read_unlock(&qdisc_tree_lock);
922 read_unlock(&qdisc_tree_lock);
926 read_unlock(&dev_base_lock);
936 /************************************************
937 * Traffic classes manipulation. *
938 ************************************************/
942 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
944 struct tcmsg *tcm = NLMSG_DATA(n);
945 struct rtattr **tca = arg;
946 struct net_device *dev;
947 struct Qdisc *q = NULL;
948 struct Qdisc_class_ops *cops;
949 unsigned long cl = 0;
950 unsigned long new_cl;
951 u32 pid = tcm->tcm_parent;
952 u32 clid = tcm->tcm_handle;
953 u32 qid = TC_H_MAJ(clid);
956 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
960 parent == TC_H_UNSPEC - unspecified parent.
961 parent == TC_H_ROOT - class is root, which has no parent.
962 parent == X:0 - parent is root class.
963 parent == X:Y - parent is a node in hierarchy.
964 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
966 handle == 0:0 - generate handle from kernel pool.
967 handle == 0:Y - class is X:Y, where X:0 is qdisc.
968 handle == X:Y - clear.
969 handle == X:0 - root class.
972 /* Step 1. Determine qdisc handle X:0 */
974 if (pid != TC_H_ROOT) {
975 u32 qid1 = TC_H_MAJ(pid);
978 /* If both majors are known, they must be identical. */
984 qid = dev->qdisc_sleeping->handle;
986 /* Now qid is genuine qdisc handle consistent
987 both with parent and child.
989 TC_H_MAJ(pid) still may be unspecified, complete it now.
992 pid = TC_H_MAKE(qid, pid);
995 qid = dev->qdisc_sleeping->handle;
998 /* OK. Locate qdisc */
999 if ((q = qdisc_lookup(dev, qid)) == NULL)
1002 /* An check that it supports classes */
1003 cops = q->ops->cl_ops;
1007 /* Now try to get class */
1009 if (pid == TC_H_ROOT)
1012 clid = TC_H_MAKE(qid, clid);
1015 cl = cops->get(q, clid);
1019 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1022 switch (n->nlmsg_type) {
1025 if (n->nlmsg_flags&NLM_F_EXCL)
1029 err = cops->delete(q, cl);
1031 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1034 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1043 err = cops->change(q, clid, pid, tca, &new_cl);
1045 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1055 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1057 u32 pid, u32 seq, u16 flags, int event)
1060 struct nlmsghdr *nlh;
1061 unsigned char *b = skb_tail_pointer(skb);
1063 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1065 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1066 tcm = NLMSG_DATA(nlh);
1067 tcm->tcm_family = AF_UNSPEC;
1068 tcm->tcm_ifindex = q->dev->ifindex;
1069 tcm->tcm_parent = q->handle;
1070 tcm->tcm_handle = q->handle;
1072 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
1073 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1074 goto rtattr_failure;
1076 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1077 TCA_XSTATS, q->stats_lock, &d) < 0)
1078 goto rtattr_failure;
1080 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1081 goto rtattr_failure;
1083 if (gnet_stats_finish_copy(&d) < 0)
1084 goto rtattr_failure;
1086 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1095 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1096 struct Qdisc *q, unsigned long cl, int event)
1098 struct sk_buff *skb;
1099 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1101 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1105 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1110 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1113 struct qdisc_dump_args
1115 struct qdisc_walker w;
1116 struct sk_buff *skb;
1117 struct netlink_callback *cb;
1120 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1122 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1124 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1125 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1128 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1132 struct net_device *dev;
1134 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1135 struct qdisc_dump_args arg;
1137 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1139 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1145 read_lock(&qdisc_tree_lock);
1146 list_for_each_entry(q, &dev->qdisc_list, list) {
1147 if (t < s_t || !q->ops->cl_ops ||
1149 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1154 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1155 arg.w.fn = qdisc_class_dump;
1159 arg.w.skip = cb->args[1];
1161 q->ops->cl_ops->walk(q, &arg.w);
1162 cb->args[1] = arg.w.count;
1167 read_unlock(&qdisc_tree_lock);
1175 /* Main classifier routine: scans classifier chain attached
1176 to this qdisc, (optionally) tests for protocol and asks
1177 specific classifiers.
1179 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1180 struct tcf_result *res)
1183 __be16 protocol = skb->protocol;
1184 #ifdef CONFIG_NET_CLS_ACT
1185 struct tcf_proto *otp = tp;
1188 protocol = skb->protocol;
1190 for ( ; tp; tp = tp->next) {
1191 if ((tp->protocol == protocol ||
1192 tp->protocol == htons(ETH_P_ALL)) &&
1193 (err = tp->classify(skb, tp, res)) >= 0) {
1194 #ifdef CONFIG_NET_CLS_ACT
1195 if ( TC_ACT_RECLASSIFY == err) {
1196 __u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
1199 if (MAX_REC_LOOP < verd++) {
1200 printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
1201 tp->prio&0xffff, ntohs(tp->protocol));
1204 skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
1208 skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
1221 void tcf_destroy(struct tcf_proto *tp)
1223 tp->ops->destroy(tp);
1224 module_put(tp->ops->owner);
1228 void tcf_destroy_chain(struct tcf_proto *fl)
1230 struct tcf_proto *tp;
1232 while ((tp = fl) != NULL) {
1237 EXPORT_SYMBOL(tcf_destroy_chain);
1239 #ifdef CONFIG_PROC_FS
1240 static int psched_show(struct seq_file *seq, void *v)
1242 seq_printf(seq, "%08x %08x %08x %08x\n",
1243 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1245 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
1250 static int psched_open(struct inode *inode, struct file *file)
1252 return single_open(file, psched_show, PDE(inode)->data);
1255 static const struct file_operations psched_fops = {
1256 .owner = THIS_MODULE,
1257 .open = psched_open,
1259 .llseek = seq_lseek,
1260 .release = single_release,
1264 static int __init pktsched_init(void)
1266 register_qdisc(&pfifo_qdisc_ops);
1267 register_qdisc(&bfifo_qdisc_ops);
1268 proc_net_fops_create("psched", 0, &psched_fops);
1270 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1271 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1272 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1273 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1274 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1275 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1280 subsys_initcall(pktsched_init);
1282 EXPORT_SYMBOL(qdisc_get_rtab);
1283 EXPORT_SYMBOL(qdisc_put_rtab);
1284 EXPORT_SYMBOL(register_qdisc);
1285 EXPORT_SYMBOL(unregister_qdisc);
1286 EXPORT_SYMBOL(tc_classify);