net: sched: cls: add extack support for tcf_change_indev
[linux-2.6-block.git] / net / sched / sch_api.c
1 /*
2  * net/sched/sch_api.c  Packet scheduler API.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Fixes:
12  *
13  * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14  * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15  * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16  */
17
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
30 #include <linux/lockdep.h>
31 #include <linux/slab.h>
32 #include <linux/hashtable.h>
33
34 #include <net/net_namespace.h>
35 #include <net/sock.h>
36 #include <net/netlink.h>
37 #include <net/pkt_sched.h>
38 #include <net/pkt_cls.h>
39
40 /*
41
42    Short review.
43    -------------
44
45    This file consists of two interrelated parts:
46
47    1. queueing disciplines manager frontend.
48    2. traffic classes manager frontend.
49
50    Generally, queueing discipline ("qdisc") is a black box,
51    which is able to enqueue packets and to dequeue them (when
52    device is ready to send something) in order and at times
53    determined by algorithm hidden in it.
54
55    qdisc's are divided to two categories:
56    - "queues", which have no internal structure visible from outside.
57    - "schedulers", which split all the packets to "traffic classes",
58      using "packet classifiers" (look at cls_api.c)
59
60    In turn, classes may have child qdiscs (as rule, queues)
61    attached to them etc. etc. etc.
62
63    The goal of the routines in this file is to translate
64    information supplied by user in the form of handles
65    to more intelligible for kernel form, to make some sanity
66    checks and part of work, which is common to all qdiscs
67    and to provide rtnetlink notifications.
68
69    All real intelligent work is done inside qdisc modules.
70
71
72
73    Every discipline has two major routines: enqueue and dequeue.
74
75    ---dequeue
76
77    dequeue usually returns a skb to send. It is allowed to return NULL,
78    but it does not mean that queue is empty, it just means that
79    discipline does not want to send anything this time.
80    Queue is really empty if q->q.qlen == 0.
81    For complicated disciplines with multiple queues q->q is not
82    real packet queue, but however q->q.qlen must be valid.
83
84    ---enqueue
85
86    enqueue returns 0, if packet was enqueued successfully.
87    If packet (this one or another one) was dropped, it returns
88    not zero error code.
89    NET_XMIT_DROP        - this packet dropped
90      Expected action: do not backoff, but wait until queue will clear.
91    NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
92      Expected action: backoff or ignore
93
94    Auxiliary routines:
95
96    ---peek
97
98    like dequeue but without removing a packet from the queue
99
100    ---reset
101
102    returns qdisc to initial state: purge all buffers, clear all
103    timers, counters (except for statistics) etc.
104
105    ---init
106
107    initializes newly created qdisc.
108
109    ---destroy
110
111    destroys resources allocated by init and during lifetime of qdisc.
112
113    ---change
114
115    changes qdisc parameters.
116  */
117
118 /* Protects list of registered TC modules. It is pure SMP lock. */
119 static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122 /************************************************
123  *      Queueing disciplines manipulation.      *
124  ************************************************/
125
126
127 /* The list of all installed queueing disciplines. */
128
129 static struct Qdisc_ops *qdisc_base;
130
131 /* Register/unregister queueing discipline */
132
133 int register_qdisc(struct Qdisc_ops *qops)
134 {
135         struct Qdisc_ops *q, **qp;
136         int rc = -EEXIST;
137
138         write_lock(&qdisc_mod_lock);
139         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140                 if (!strcmp(qops->id, q->id))
141                         goto out;
142
143         if (qops->enqueue == NULL)
144                 qops->enqueue = noop_qdisc_ops.enqueue;
145         if (qops->peek == NULL) {
146                 if (qops->dequeue == NULL)
147                         qops->peek = noop_qdisc_ops.peek;
148                 else
149                         goto out_einval;
150         }
151         if (qops->dequeue == NULL)
152                 qops->dequeue = noop_qdisc_ops.dequeue;
153
154         if (qops->cl_ops) {
155                 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
157                 if (!(cops->find && cops->walk && cops->leaf))
158                         goto out_einval;
159
160                 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
161                         goto out_einval;
162         }
163
164         qops->next = NULL;
165         *qp = qops;
166         rc = 0;
167 out:
168         write_unlock(&qdisc_mod_lock);
169         return rc;
170
171 out_einval:
172         rc = -EINVAL;
173         goto out;
174 }
175 EXPORT_SYMBOL(register_qdisc);
176
177 int unregister_qdisc(struct Qdisc_ops *qops)
178 {
179         struct Qdisc_ops *q, **qp;
180         int err = -ENOENT;
181
182         write_lock(&qdisc_mod_lock);
183         for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
184                 if (q == qops)
185                         break;
186         if (q) {
187                 *qp = q->next;
188                 q->next = NULL;
189                 err = 0;
190         }
191         write_unlock(&qdisc_mod_lock);
192         return err;
193 }
194 EXPORT_SYMBOL(unregister_qdisc);
195
196 /* Get default qdisc if not otherwise specified */
197 void qdisc_get_default(char *name, size_t len)
198 {
199         read_lock(&qdisc_mod_lock);
200         strlcpy(name, default_qdisc_ops->id, len);
201         read_unlock(&qdisc_mod_lock);
202 }
203
204 static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205 {
206         struct Qdisc_ops *q = NULL;
207
208         for (q = qdisc_base; q; q = q->next) {
209                 if (!strcmp(name, q->id)) {
210                         if (!try_module_get(q->owner))
211                                 q = NULL;
212                         break;
213                 }
214         }
215
216         return q;
217 }
218
219 /* Set new default qdisc to use */
220 int qdisc_set_default(const char *name)
221 {
222         const struct Qdisc_ops *ops;
223
224         if (!capable(CAP_NET_ADMIN))
225                 return -EPERM;
226
227         write_lock(&qdisc_mod_lock);
228         ops = qdisc_lookup_default(name);
229         if (!ops) {
230                 /* Not found, drop lock and try to load module */
231                 write_unlock(&qdisc_mod_lock);
232                 request_module("sch_%s", name);
233                 write_lock(&qdisc_mod_lock);
234
235                 ops = qdisc_lookup_default(name);
236         }
237
238         if (ops) {
239                 /* Set new default */
240                 module_put(default_qdisc_ops->owner);
241                 default_qdisc_ops = ops;
242         }
243         write_unlock(&qdisc_mod_lock);
244
245         return ops ? 0 : -ENOENT;
246 }
247
248 #ifdef CONFIG_NET_SCH_DEFAULT
249 /* Set default value from kernel config */
250 static int __init sch_default_qdisc(void)
251 {
252         return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253 }
254 late_initcall(sch_default_qdisc);
255 #endif
256
257 /* We know handle. Find qdisc among all qdisc's attached to device
258  * (root qdisc, all its children, children of children etc.)
259  * Note: caller either uses rtnl or rcu_read_lock()
260  */
261
262 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
263 {
264         struct Qdisc *q;
265
266         if (!qdisc_dev(root))
267                 return (root->handle == handle ? root : NULL);
268
269         if (!(root->flags & TCQ_F_BUILTIN) &&
270             root->handle == handle)
271                 return root;
272
273         hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
274                 if (q->handle == handle)
275                         return q;
276         }
277         return NULL;
278 }
279
280 void qdisc_hash_add(struct Qdisc *q, bool invisible)
281 {
282         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
283                 ASSERT_RTNL();
284                 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
285                 if (invisible)
286                         q->flags |= TCQ_F_INVISIBLE;
287         }
288 }
289 EXPORT_SYMBOL(qdisc_hash_add);
290
291 void qdisc_hash_del(struct Qdisc *q)
292 {
293         if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294                 ASSERT_RTNL();
295                 hash_del_rcu(&q->hash);
296         }
297 }
298 EXPORT_SYMBOL(qdisc_hash_del);
299
300 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
301 {
302         struct Qdisc *q;
303
304         if (!handle)
305                 return NULL;
306         q = qdisc_match_from_root(dev->qdisc, handle);
307         if (q)
308                 goto out;
309
310         if (dev_ingress_queue(dev))
311                 q = qdisc_match_from_root(
312                         dev_ingress_queue(dev)->qdisc_sleeping,
313                         handle);
314 out:
315         return q;
316 }
317
318 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
319 {
320         unsigned long cl;
321         struct Qdisc *leaf;
322         const struct Qdisc_class_ops *cops = p->ops->cl_ops;
323
324         if (cops == NULL)
325                 return NULL;
326         cl = cops->find(p, classid);
327
328         if (cl == 0)
329                 return NULL;
330         leaf = cops->leaf(p, cl);
331         return leaf;
332 }
333
334 /* Find queueing discipline by name */
335
336 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
337 {
338         struct Qdisc_ops *q = NULL;
339
340         if (kind) {
341                 read_lock(&qdisc_mod_lock);
342                 for (q = qdisc_base; q; q = q->next) {
343                         if (nla_strcmp(kind, q->id) == 0) {
344                                 if (!try_module_get(q->owner))
345                                         q = NULL;
346                                 break;
347                         }
348                 }
349                 read_unlock(&qdisc_mod_lock);
350         }
351         return q;
352 }
353
354 /* The linklayer setting were not transferred from iproute2, in older
355  * versions, and the rate tables lookup systems have been dropped in
356  * the kernel. To keep backward compatible with older iproute2 tc
357  * utils, we detect the linklayer setting by detecting if the rate
358  * table were modified.
359  *
360  * For linklayer ATM table entries, the rate table will be aligned to
361  * 48 bytes, thus some table entries will contain the same value.  The
362  * mpu (min packet unit) is also encoded into the old rate table, thus
363  * starting from the mpu, we find low and high table entries for
364  * mapping this cell.  If these entries contain the same value, when
365  * the rate tables have been modified for linklayer ATM.
366  *
367  * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368  * and then roundup to the next cell, calc the table entry one below,
369  * and compare.
370  */
371 static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
372 {
373         int low       = roundup(r->mpu, 48);
374         int high      = roundup(low+1, 48);
375         int cell_low  = low >> r->cell_log;
376         int cell_high = (high >> r->cell_log) - 1;
377
378         /* rtab is too inaccurate at rates > 100Mbit/s */
379         if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380                 pr_debug("TC linklayer: Giving up ATM detection\n");
381                 return TC_LINKLAYER_ETHERNET;
382         }
383
384         if ((cell_high > cell_low) && (cell_high < 256)
385             && (rtab[cell_low] == rtab[cell_high])) {
386                 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387                          cell_low, cell_high, rtab[cell_high]);
388                 return TC_LINKLAYER_ATM;
389         }
390         return TC_LINKLAYER_ETHERNET;
391 }
392
393 static struct qdisc_rate_table *qdisc_rtab_list;
394
395 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
396                                         struct nlattr *tab,
397                                         struct netlink_ext_ack *extack)
398 {
399         struct qdisc_rate_table *rtab;
400
401         if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
402             nla_len(tab) != TC_RTAB_SIZE) {
403                 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
404                 return NULL;
405         }
406
407         for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
408                 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
409                     !memcmp(&rtab->data, nla_data(tab), 1024)) {
410                         rtab->refcnt++;
411                         return rtab;
412                 }
413         }
414
415         rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
416         if (rtab) {
417                 rtab->rate = *r;
418                 rtab->refcnt = 1;
419                 memcpy(rtab->data, nla_data(tab), 1024);
420                 if (r->linklayer == TC_LINKLAYER_UNAWARE)
421                         r->linklayer = __detect_linklayer(r, rtab->data);
422                 rtab->next = qdisc_rtab_list;
423                 qdisc_rtab_list = rtab;
424         } else {
425                 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
426         }
427         return rtab;
428 }
429 EXPORT_SYMBOL(qdisc_get_rtab);
430
431 void qdisc_put_rtab(struct qdisc_rate_table *tab)
432 {
433         struct qdisc_rate_table *rtab, **rtabp;
434
435         if (!tab || --tab->refcnt)
436                 return;
437
438         for (rtabp = &qdisc_rtab_list;
439              (rtab = *rtabp) != NULL;
440              rtabp = &rtab->next) {
441                 if (rtab == tab) {
442                         *rtabp = rtab->next;
443                         kfree(rtab);
444                         return;
445                 }
446         }
447 }
448 EXPORT_SYMBOL(qdisc_put_rtab);
449
450 static LIST_HEAD(qdisc_stab_list);
451
452 static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
453         [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
454         [TCA_STAB_DATA] = { .type = NLA_BINARY },
455 };
456
457 static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
458                                                struct netlink_ext_ack *extack)
459 {
460         struct nlattr *tb[TCA_STAB_MAX + 1];
461         struct qdisc_size_table *stab;
462         struct tc_sizespec *s;
463         unsigned int tsize = 0;
464         u16 *tab = NULL;
465         int err;
466
467         err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
468         if (err < 0)
469                 return ERR_PTR(err);
470         if (!tb[TCA_STAB_BASE]) {
471                 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
472                 return ERR_PTR(-EINVAL);
473         }
474
475         s = nla_data(tb[TCA_STAB_BASE]);
476
477         if (s->tsize > 0) {
478                 if (!tb[TCA_STAB_DATA]) {
479                         NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
480                         return ERR_PTR(-EINVAL);
481                 }
482                 tab = nla_data(tb[TCA_STAB_DATA]);
483                 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
484         }
485
486         if (tsize != s->tsize || (!tab && tsize > 0)) {
487                 NL_SET_ERR_MSG(extack, "Invalid size of size table");
488                 return ERR_PTR(-EINVAL);
489         }
490
491         list_for_each_entry(stab, &qdisc_stab_list, list) {
492                 if (memcmp(&stab->szopts, s, sizeof(*s)))
493                         continue;
494                 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
495                         continue;
496                 stab->refcnt++;
497                 return stab;
498         }
499
500         stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
501         if (!stab)
502                 return ERR_PTR(-ENOMEM);
503
504         stab->refcnt = 1;
505         stab->szopts = *s;
506         if (tsize > 0)
507                 memcpy(stab->data, tab, tsize * sizeof(u16));
508
509         list_add_tail(&stab->list, &qdisc_stab_list);
510
511         return stab;
512 }
513
514 static void stab_kfree_rcu(struct rcu_head *head)
515 {
516         kfree(container_of(head, struct qdisc_size_table, rcu));
517 }
518
519 void qdisc_put_stab(struct qdisc_size_table *tab)
520 {
521         if (!tab)
522                 return;
523
524         if (--tab->refcnt == 0) {
525                 list_del(&tab->list);
526                 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
527         }
528 }
529 EXPORT_SYMBOL(qdisc_put_stab);
530
531 static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
532 {
533         struct nlattr *nest;
534
535         nest = nla_nest_start(skb, TCA_STAB);
536         if (nest == NULL)
537                 goto nla_put_failure;
538         if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
539                 goto nla_put_failure;
540         nla_nest_end(skb, nest);
541
542         return skb->len;
543
544 nla_put_failure:
545         return -1;
546 }
547
548 void __qdisc_calculate_pkt_len(struct sk_buff *skb,
549                                const struct qdisc_size_table *stab)
550 {
551         int pkt_len, slot;
552
553         pkt_len = skb->len + stab->szopts.overhead;
554         if (unlikely(!stab->szopts.tsize))
555                 goto out;
556
557         slot = pkt_len + stab->szopts.cell_align;
558         if (unlikely(slot < 0))
559                 slot = 0;
560
561         slot >>= stab->szopts.cell_log;
562         if (likely(slot < stab->szopts.tsize))
563                 pkt_len = stab->data[slot];
564         else
565                 pkt_len = stab->data[stab->szopts.tsize - 1] *
566                                 (slot / stab->szopts.tsize) +
567                                 stab->data[slot % stab->szopts.tsize];
568
569         pkt_len <<= stab->szopts.size_log;
570 out:
571         if (unlikely(pkt_len < 1))
572                 pkt_len = 1;
573         qdisc_skb_cb(skb)->pkt_len = pkt_len;
574 }
575 EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
576
577 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
578 {
579         if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
580                 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
581                         txt, qdisc->ops->id, qdisc->handle >> 16);
582                 qdisc->flags |= TCQ_F_WARN_NONWC;
583         }
584 }
585 EXPORT_SYMBOL(qdisc_warn_nonwc);
586
587 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
588 {
589         struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
590                                                  timer);
591
592         rcu_read_lock();
593         __netif_schedule(qdisc_root(wd->qdisc));
594         rcu_read_unlock();
595
596         return HRTIMER_NORESTART;
597 }
598
599 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
600 {
601         hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
602         wd->timer.function = qdisc_watchdog;
603         wd->qdisc = qdisc;
604 }
605 EXPORT_SYMBOL(qdisc_watchdog_init);
606
607 void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
608 {
609         if (test_bit(__QDISC_STATE_DEACTIVATED,
610                      &qdisc_root_sleeping(wd->qdisc)->state))
611                 return;
612
613         if (wd->last_expires == expires)
614                 return;
615
616         wd->last_expires = expires;
617         hrtimer_start(&wd->timer,
618                       ns_to_ktime(expires),
619                       HRTIMER_MODE_ABS_PINNED);
620 }
621 EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
622
623 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
624 {
625         hrtimer_cancel(&wd->timer);
626 }
627 EXPORT_SYMBOL(qdisc_watchdog_cancel);
628
629 static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
630 {
631         struct hlist_head *h;
632         unsigned int i;
633
634         h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
635
636         if (h != NULL) {
637                 for (i = 0; i < n; i++)
638                         INIT_HLIST_HEAD(&h[i]);
639         }
640         return h;
641 }
642
643 void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
644 {
645         struct Qdisc_class_common *cl;
646         struct hlist_node *next;
647         struct hlist_head *nhash, *ohash;
648         unsigned int nsize, nmask, osize;
649         unsigned int i, h;
650
651         /* Rehash when load factor exceeds 0.75 */
652         if (clhash->hashelems * 4 <= clhash->hashsize * 3)
653                 return;
654         nsize = clhash->hashsize * 2;
655         nmask = nsize - 1;
656         nhash = qdisc_class_hash_alloc(nsize);
657         if (nhash == NULL)
658                 return;
659
660         ohash = clhash->hash;
661         osize = clhash->hashsize;
662
663         sch_tree_lock(sch);
664         for (i = 0; i < osize; i++) {
665                 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
666                         h = qdisc_class_hash(cl->classid, nmask);
667                         hlist_add_head(&cl->hnode, &nhash[h]);
668                 }
669         }
670         clhash->hash     = nhash;
671         clhash->hashsize = nsize;
672         clhash->hashmask = nmask;
673         sch_tree_unlock(sch);
674
675         kvfree(ohash);
676 }
677 EXPORT_SYMBOL(qdisc_class_hash_grow);
678
679 int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
680 {
681         unsigned int size = 4;
682
683         clhash->hash = qdisc_class_hash_alloc(size);
684         if (!clhash->hash)
685                 return -ENOMEM;
686         clhash->hashsize  = size;
687         clhash->hashmask  = size - 1;
688         clhash->hashelems = 0;
689         return 0;
690 }
691 EXPORT_SYMBOL(qdisc_class_hash_init);
692
693 void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
694 {
695         kvfree(clhash->hash);
696 }
697 EXPORT_SYMBOL(qdisc_class_hash_destroy);
698
699 void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
700                              struct Qdisc_class_common *cl)
701 {
702         unsigned int h;
703
704         INIT_HLIST_NODE(&cl->hnode);
705         h = qdisc_class_hash(cl->classid, clhash->hashmask);
706         hlist_add_head(&cl->hnode, &clhash->hash[h]);
707         clhash->hashelems++;
708 }
709 EXPORT_SYMBOL(qdisc_class_hash_insert);
710
711 void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
712                              struct Qdisc_class_common *cl)
713 {
714         hlist_del(&cl->hnode);
715         clhash->hashelems--;
716 }
717 EXPORT_SYMBOL(qdisc_class_hash_remove);
718
719 /* Allocate an unique handle from space managed by kernel
720  * Possible range is [8000-FFFF]:0000 (0x8000 values)
721  */
722 static u32 qdisc_alloc_handle(struct net_device *dev)
723 {
724         int i = 0x8000;
725         static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
726
727         do {
728                 autohandle += TC_H_MAKE(0x10000U, 0);
729                 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
730                         autohandle = TC_H_MAKE(0x80000000U, 0);
731                 if (!qdisc_lookup(dev, autohandle))
732                         return autohandle;
733                 cond_resched();
734         } while (--i > 0);
735
736         return 0;
737 }
738
739 void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
740                                unsigned int len)
741 {
742         const struct Qdisc_class_ops *cops;
743         unsigned long cl;
744         u32 parentid;
745         bool notify;
746         int drops;
747
748         if (n == 0 && len == 0)
749                 return;
750         drops = max_t(int, n, 0);
751         rcu_read_lock();
752         while ((parentid = sch->parent)) {
753                 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
754                         break;
755
756                 if (sch->flags & TCQ_F_NOPARENT)
757                         break;
758                 /* Notify parent qdisc only if child qdisc becomes empty.
759                  *
760                  * If child was empty even before update then backlog
761                  * counter is screwed and we skip notification because
762                  * parent class is already passive.
763                  */
764                 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
765                 /* TODO: perform the search on a per txq basis */
766                 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
767                 if (sch == NULL) {
768                         WARN_ON_ONCE(parentid != TC_H_ROOT);
769                         break;
770                 }
771                 cops = sch->ops->cl_ops;
772                 if (notify && cops->qlen_notify) {
773                         cl = cops->find(sch, parentid);
774                         cops->qlen_notify(sch, cl);
775                 }
776                 sch->q.qlen -= n;
777                 sch->qstats.backlog -= len;
778                 __qdisc_qstats_drop(sch, drops);
779         }
780         rcu_read_unlock();
781 }
782 EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
783
784 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
785                          u32 portid, u32 seq, u16 flags, int event)
786 {
787         struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
788         struct gnet_stats_queue __percpu *cpu_qstats = NULL;
789         struct tcmsg *tcm;
790         struct nlmsghdr  *nlh;
791         unsigned char *b = skb_tail_pointer(skb);
792         struct gnet_dump d;
793         struct qdisc_size_table *stab;
794         u32 block_index;
795         __u32 qlen;
796
797         cond_resched();
798         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
799         if (!nlh)
800                 goto out_nlmsg_trim;
801         tcm = nlmsg_data(nlh);
802         tcm->tcm_family = AF_UNSPEC;
803         tcm->tcm__pad1 = 0;
804         tcm->tcm__pad2 = 0;
805         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
806         tcm->tcm_parent = clid;
807         tcm->tcm_handle = q->handle;
808         tcm->tcm_info = refcount_read(&q->refcnt);
809         if (nla_put_string(skb, TCA_KIND, q->ops->id))
810                 goto nla_put_failure;
811         if (q->ops->ingress_block_get) {
812                 block_index = q->ops->ingress_block_get(q);
813                 if (block_index &&
814                     nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
815                         goto nla_put_failure;
816         }
817         if (q->ops->egress_block_get) {
818                 block_index = q->ops->egress_block_get(q);
819                 if (block_index &&
820                     nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
821                         goto nla_put_failure;
822         }
823         if (q->ops->dump && q->ops->dump(q, skb) < 0)
824                 goto nla_put_failure;
825         if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
826                 goto nla_put_failure;
827         qlen = qdisc_qlen_sum(q);
828
829         stab = rtnl_dereference(q->stab);
830         if (stab && qdisc_dump_stab(skb, stab) < 0)
831                 goto nla_put_failure;
832
833         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
834                                          NULL, &d, TCA_PAD) < 0)
835                 goto nla_put_failure;
836
837         if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
838                 goto nla_put_failure;
839
840         if (qdisc_is_percpu_stats(q)) {
841                 cpu_bstats = q->cpu_bstats;
842                 cpu_qstats = q->cpu_qstats;
843         }
844
845         if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
846                                   &d, cpu_bstats, &q->bstats) < 0 ||
847             gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
848             gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
849                 goto nla_put_failure;
850
851         if (gnet_stats_finish_copy(&d) < 0)
852                 goto nla_put_failure;
853
854         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
855         return skb->len;
856
857 out_nlmsg_trim:
858 nla_put_failure:
859         nlmsg_trim(skb, b);
860         return -1;
861 }
862
863 static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
864 {
865         if (q->flags & TCQ_F_BUILTIN)
866                 return true;
867         if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
868                 return true;
869
870         return false;
871 }
872
873 static int qdisc_notify(struct net *net, struct sk_buff *oskb,
874                         struct nlmsghdr *n, u32 clid,
875                         struct Qdisc *old, struct Qdisc *new)
876 {
877         struct sk_buff *skb;
878         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
879
880         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
881         if (!skb)
882                 return -ENOBUFS;
883
884         if (old && !tc_qdisc_dump_ignore(old, false)) {
885                 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
886                                   0, RTM_DELQDISC) < 0)
887                         goto err_out;
888         }
889         if (new && !tc_qdisc_dump_ignore(new, false)) {
890                 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
891                                   old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
892                         goto err_out;
893         }
894
895         if (skb->len)
896                 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
897                                       n->nlmsg_flags & NLM_F_ECHO);
898
899 err_out:
900         kfree_skb(skb);
901         return -EINVAL;
902 }
903
904 static void notify_and_destroy(struct net *net, struct sk_buff *skb,
905                                struct nlmsghdr *n, u32 clid,
906                                struct Qdisc *old, struct Qdisc *new)
907 {
908         if (new || old)
909                 qdisc_notify(net, skb, n, clid, old, new);
910
911         if (old)
912                 qdisc_destroy(old);
913 }
914
915 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
916  * to device "dev".
917  *
918  * When appropriate send a netlink notification using 'skb'
919  * and "n".
920  *
921  * On success, destroy old qdisc.
922  */
923
924 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
925                        struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
926                        struct Qdisc *new, struct Qdisc *old,
927                        struct netlink_ext_ack *extack)
928 {
929         struct Qdisc *q = old;
930         struct net *net = dev_net(dev);
931         int err = 0;
932
933         if (parent == NULL) {
934                 unsigned int i, num_q, ingress;
935
936                 ingress = 0;
937                 num_q = dev->num_tx_queues;
938                 if ((q && q->flags & TCQ_F_INGRESS) ||
939                     (new && new->flags & TCQ_F_INGRESS)) {
940                         num_q = 1;
941                         ingress = 1;
942                         if (!dev_ingress_queue(dev)) {
943                                 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
944                                 return -ENOENT;
945                         }
946                 }
947
948                 if (dev->flags & IFF_UP)
949                         dev_deactivate(dev);
950
951                 if (new && new->ops->attach)
952                         goto skip;
953
954                 for (i = 0; i < num_q; i++) {
955                         struct netdev_queue *dev_queue = dev_ingress_queue(dev);
956
957                         if (!ingress)
958                                 dev_queue = netdev_get_tx_queue(dev, i);
959
960                         old = dev_graft_qdisc(dev_queue, new);
961                         if (new && i > 0)
962                                 qdisc_refcount_inc(new);
963
964                         if (!ingress)
965                                 qdisc_destroy(old);
966                 }
967
968 skip:
969                 if (!ingress) {
970                         notify_and_destroy(net, skb, n, classid,
971                                            dev->qdisc, new);
972                         if (new && !new->ops->attach)
973                                 qdisc_refcount_inc(new);
974                         dev->qdisc = new ? : &noop_qdisc;
975
976                         if (new && new->ops->attach)
977                                 new->ops->attach(new);
978                 } else {
979                         notify_and_destroy(net, skb, n, classid, old, new);
980                 }
981
982                 if (dev->flags & IFF_UP)
983                         dev_activate(dev);
984         } else {
985                 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
986
987                 /* Only support running class lockless if parent is lockless */
988                 if (new && (new->flags & TCQ_F_NOLOCK) &&
989                     parent && !(parent->flags & TCQ_F_NOLOCK))
990                         new->flags &= ~TCQ_F_NOLOCK;
991
992                 err = -EOPNOTSUPP;
993                 if (cops && cops->graft) {
994                         unsigned long cl = cops->find(parent, classid);
995
996                         if (cl) {
997                                 err = cops->graft(parent, cl, new, &old,
998                                                   extack);
999                         } else {
1000                                 NL_SET_ERR_MSG(extack, "Specified class not found");
1001                                 err = -ENOENT;
1002                         }
1003                 }
1004                 if (!err)
1005                         notify_and_destroy(net, skb, n, classid, old, new);
1006         }
1007         return err;
1008 }
1009
1010 static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1011                                    struct netlink_ext_ack *extack)
1012 {
1013         u32 block_index;
1014
1015         if (tca[TCA_INGRESS_BLOCK]) {
1016                 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1017
1018                 if (!block_index) {
1019                         NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1020                         return -EINVAL;
1021                 }
1022                 if (!sch->ops->ingress_block_set) {
1023                         NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1024                         return -EOPNOTSUPP;
1025                 }
1026                 sch->ops->ingress_block_set(sch, block_index);
1027         }
1028         if (tca[TCA_EGRESS_BLOCK]) {
1029                 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1030
1031                 if (!block_index) {
1032                         NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1033                         return -EINVAL;
1034                 }
1035                 if (!sch->ops->egress_block_set) {
1036                         NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1037                         return -EOPNOTSUPP;
1038                 }
1039                 sch->ops->egress_block_set(sch, block_index);
1040         }
1041         return 0;
1042 }
1043
1044 /* lockdep annotation is needed for ingress; egress gets it only for name */
1045 static struct lock_class_key qdisc_tx_lock;
1046 static struct lock_class_key qdisc_rx_lock;
1047
1048 /*
1049    Allocate and initialize new qdisc.
1050
1051    Parameters are passed via opt.
1052  */
1053
1054 static struct Qdisc *qdisc_create(struct net_device *dev,
1055                                   struct netdev_queue *dev_queue,
1056                                   struct Qdisc *p, u32 parent, u32 handle,
1057                                   struct nlattr **tca, int *errp,
1058                                   struct netlink_ext_ack *extack)
1059 {
1060         int err;
1061         struct nlattr *kind = tca[TCA_KIND];
1062         struct Qdisc *sch;
1063         struct Qdisc_ops *ops;
1064         struct qdisc_size_table *stab;
1065
1066         ops = qdisc_lookup_ops(kind);
1067 #ifdef CONFIG_MODULES
1068         if (ops == NULL && kind != NULL) {
1069                 char name[IFNAMSIZ];
1070                 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1071                         /* We dropped the RTNL semaphore in order to
1072                          * perform the module load.  So, even if we
1073                          * succeeded in loading the module we have to
1074                          * tell the caller to replay the request.  We
1075                          * indicate this using -EAGAIN.
1076                          * We replay the request because the device may
1077                          * go away in the mean time.
1078                          */
1079                         rtnl_unlock();
1080                         request_module("sch_%s", name);
1081                         rtnl_lock();
1082                         ops = qdisc_lookup_ops(kind);
1083                         if (ops != NULL) {
1084                                 /* We will try again qdisc_lookup_ops,
1085                                  * so don't keep a reference.
1086                                  */
1087                                 module_put(ops->owner);
1088                                 err = -EAGAIN;
1089                                 goto err_out;
1090                         }
1091                 }
1092         }
1093 #endif
1094
1095         err = -ENOENT;
1096         if (!ops) {
1097                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1098                 goto err_out;
1099         }
1100
1101         sch = qdisc_alloc(dev_queue, ops, extack);
1102         if (IS_ERR(sch)) {
1103                 err = PTR_ERR(sch);
1104                 goto err_out2;
1105         }
1106
1107         sch->parent = parent;
1108
1109         if (handle == TC_H_INGRESS) {
1110                 sch->flags |= TCQ_F_INGRESS;
1111                 handle = TC_H_MAKE(TC_H_INGRESS, 0);
1112                 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
1113         } else {
1114                 if (handle == 0) {
1115                         handle = qdisc_alloc_handle(dev);
1116                         err = -ENOMEM;
1117                         if (handle == 0)
1118                                 goto err_out3;
1119                 }
1120                 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1121                 if (!netif_is_multiqueue(dev))
1122                         sch->flags |= TCQ_F_ONETXQUEUE;
1123         }
1124
1125         sch->handle = handle;
1126
1127         /* This exist to keep backward compatible with a userspace
1128          * loophole, what allowed userspace to get IFF_NO_QUEUE
1129          * facility on older kernels by setting tx_queue_len=0 (prior
1130          * to qdisc init), and then forgot to reinit tx_queue_len
1131          * before again attaching a qdisc.
1132          */
1133         if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1134                 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1135                 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1136         }
1137
1138         err = qdisc_block_indexes_set(sch, tca, extack);
1139         if (err)
1140                 goto err_out3;
1141
1142         if (ops->init) {
1143                 err = ops->init(sch, tca[TCA_OPTIONS], extack);
1144                 if (err != 0)
1145                         goto err_out5;
1146         }
1147
1148         if (tca[TCA_STAB]) {
1149                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1150                 if (IS_ERR(stab)) {
1151                         err = PTR_ERR(stab);
1152                         goto err_out4;
1153                 }
1154                 rcu_assign_pointer(sch->stab, stab);
1155         }
1156         if (tca[TCA_RATE]) {
1157                 seqcount_t *running;
1158
1159                 err = -EOPNOTSUPP;
1160                 if (sch->flags & TCQ_F_MQROOT) {
1161                         NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
1162                         goto err_out4;
1163                 }
1164
1165                 if (sch->parent != TC_H_ROOT &&
1166                     !(sch->flags & TCQ_F_INGRESS) &&
1167                     (!p || !(p->flags & TCQ_F_MQROOT)))
1168                         running = qdisc_root_sleeping_running(sch);
1169                 else
1170                         running = &sch->running;
1171
1172                 err = gen_new_estimator(&sch->bstats,
1173                                         sch->cpu_bstats,
1174                                         &sch->rate_est,
1175                                         NULL,
1176                                         running,
1177                                         tca[TCA_RATE]);
1178                 if (err) {
1179                         NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
1180                         goto err_out4;
1181                 }
1182         }
1183
1184         qdisc_hash_add(sch, false);
1185
1186         return sch;
1187
1188 err_out5:
1189         /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
1190         if (ops->destroy)
1191                 ops->destroy(sch);
1192 err_out3:
1193         dev_put(dev);
1194         qdisc_free(sch);
1195 err_out2:
1196         module_put(ops->owner);
1197 err_out:
1198         *errp = err;
1199         return NULL;
1200
1201 err_out4:
1202         /*
1203          * Any broken qdiscs that would require a ops->reset() here?
1204          * The qdisc was never in action so it shouldn't be necessary.
1205          */
1206         qdisc_put_stab(rtnl_dereference(sch->stab));
1207         if (ops->destroy)
1208                 ops->destroy(sch);
1209         goto err_out3;
1210 }
1211
1212 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1213                         struct netlink_ext_ack *extack)
1214 {
1215         struct qdisc_size_table *ostab, *stab = NULL;
1216         int err = 0;
1217
1218         if (tca[TCA_OPTIONS]) {
1219                 if (!sch->ops->change) {
1220                         NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1221                         return -EINVAL;
1222                 }
1223                 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1224                         NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1225                         return -EOPNOTSUPP;
1226                 }
1227                 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1228                 if (err)
1229                         return err;
1230         }
1231
1232         if (tca[TCA_STAB]) {
1233                 stab = qdisc_get_stab(tca[TCA_STAB], extack);
1234                 if (IS_ERR(stab))
1235                         return PTR_ERR(stab);
1236         }
1237
1238         ostab = rtnl_dereference(sch->stab);
1239         rcu_assign_pointer(sch->stab, stab);
1240         qdisc_put_stab(ostab);
1241
1242         if (tca[TCA_RATE]) {
1243                 /* NB: ignores errors from replace_estimator
1244                    because change can't be undone. */
1245                 if (sch->flags & TCQ_F_MQROOT)
1246                         goto out;
1247                 gen_replace_estimator(&sch->bstats,
1248                                       sch->cpu_bstats,
1249                                       &sch->rate_est,
1250                                       NULL,
1251                                       qdisc_root_sleeping_running(sch),
1252                                       tca[TCA_RATE]);
1253         }
1254 out:
1255         return 0;
1256 }
1257
1258 struct check_loop_arg {
1259         struct qdisc_walker     w;
1260         struct Qdisc            *p;
1261         int                     depth;
1262 };
1263
1264 static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1265                          struct qdisc_walker *w);
1266
1267 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1268 {
1269         struct check_loop_arg   arg;
1270
1271         if (q->ops->cl_ops == NULL)
1272                 return 0;
1273
1274         arg.w.stop = arg.w.skip = arg.w.count = 0;
1275         arg.w.fn = check_loop_fn;
1276         arg.depth = depth;
1277         arg.p = p;
1278         q->ops->cl_ops->walk(q, &arg.w);
1279         return arg.w.stop ? -ELOOP : 0;
1280 }
1281
1282 static int
1283 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1284 {
1285         struct Qdisc *leaf;
1286         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1287         struct check_loop_arg *arg = (struct check_loop_arg *)w;
1288
1289         leaf = cops->leaf(q, cl);
1290         if (leaf) {
1291                 if (leaf == arg->p || arg->depth > 7)
1292                         return -ELOOP;
1293                 return check_loop(leaf, arg->p, arg->depth + 1);
1294         }
1295         return 0;
1296 }
1297
1298 /*
1299  * Delete/get qdisc.
1300  */
1301
1302 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1303                         struct netlink_ext_ack *extack)
1304 {
1305         struct net *net = sock_net(skb->sk);
1306         struct tcmsg *tcm = nlmsg_data(n);
1307         struct nlattr *tca[TCA_MAX + 1];
1308         struct net_device *dev;
1309         u32 clid;
1310         struct Qdisc *q = NULL;
1311         struct Qdisc *p = NULL;
1312         int err;
1313
1314         if ((n->nlmsg_type != RTM_GETQDISC) &&
1315             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1316                 return -EPERM;
1317
1318         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1319         if (err < 0)
1320                 return err;
1321
1322         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1323         if (!dev)
1324                 return -ENODEV;
1325
1326         clid = tcm->tcm_parent;
1327         if (clid) {
1328                 if (clid != TC_H_ROOT) {
1329                         if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
1330                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1331                                 if (!p) {
1332                                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1333                                         return -ENOENT;
1334                                 }
1335                                 q = qdisc_leaf(p, clid);
1336                         } else if (dev_ingress_queue(dev)) {
1337                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1338                         }
1339                 } else {
1340                         q = dev->qdisc;
1341                 }
1342                 if (!q) {
1343                         NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1344                         return -ENOENT;
1345                 }
1346
1347                 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1348                         NL_SET_ERR_MSG(extack, "Invalid handle");
1349                         return -EINVAL;
1350                 }
1351         } else {
1352                 q = qdisc_lookup(dev, tcm->tcm_handle);
1353                 if (!q) {
1354                         NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1355                         return -ENOENT;
1356                 }
1357         }
1358
1359         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1360                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1361                 return -EINVAL;
1362         }
1363
1364         if (n->nlmsg_type == RTM_DELQDISC) {
1365                 if (!clid) {
1366                         NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1367                         return -EINVAL;
1368                 }
1369                 if (q->handle == 0) {
1370                         NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1371                         return -ENOENT;
1372                 }
1373                 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
1374                 if (err != 0)
1375                         return err;
1376         } else {
1377                 qdisc_notify(net, skb, n, clid, NULL, q);
1378         }
1379         return 0;
1380 }
1381
1382 /*
1383  * Create/change qdisc.
1384  */
1385
1386 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1387                            struct netlink_ext_ack *extack)
1388 {
1389         struct net *net = sock_net(skb->sk);
1390         struct tcmsg *tcm;
1391         struct nlattr *tca[TCA_MAX + 1];
1392         struct net_device *dev;
1393         u32 clid;
1394         struct Qdisc *q, *p;
1395         int err;
1396
1397         if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1398                 return -EPERM;
1399
1400 replay:
1401         /* Reinit, just in case something touches this. */
1402         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1403         if (err < 0)
1404                 return err;
1405
1406         tcm = nlmsg_data(n);
1407         clid = tcm->tcm_parent;
1408         q = p = NULL;
1409
1410         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1411         if (!dev)
1412                 return -ENODEV;
1413
1414
1415         if (clid) {
1416                 if (clid != TC_H_ROOT) {
1417                         if (clid != TC_H_INGRESS) {
1418                                 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1419                                 if (!p) {
1420                                         NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1421                                         return -ENOENT;
1422                                 }
1423                                 q = qdisc_leaf(p, clid);
1424                         } else if (dev_ingress_queue_create(dev)) {
1425                                 q = dev_ingress_queue(dev)->qdisc_sleeping;
1426                         }
1427                 } else {
1428                         q = dev->qdisc;
1429                 }
1430
1431                 /* It may be default qdisc, ignore it */
1432                 if (q && q->handle == 0)
1433                         q = NULL;
1434
1435                 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1436                         if (tcm->tcm_handle) {
1437                                 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1438                                         NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1439                                         return -EEXIST;
1440                                 }
1441                                 if (TC_H_MIN(tcm->tcm_handle)) {
1442                                         NL_SET_ERR_MSG(extack, "Invalid minor handle");
1443                                         return -EINVAL;
1444                                 }
1445                                 q = qdisc_lookup(dev, tcm->tcm_handle);
1446                                 if (!q)
1447                                         goto create_n_graft;
1448                                 if (n->nlmsg_flags & NLM_F_EXCL) {
1449                                         NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1450                                         return -EEXIST;
1451                                 }
1452                                 if (tca[TCA_KIND] &&
1453                                     nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1454                                         NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1455                                         return -EINVAL;
1456                                 }
1457                                 if (q == p ||
1458                                     (p && check_loop(q, p, 0))) {
1459                                         NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1460                                         return -ELOOP;
1461                                 }
1462                                 qdisc_refcount_inc(q);
1463                                 goto graft;
1464                         } else {
1465                                 if (!q)
1466                                         goto create_n_graft;
1467
1468                                 /* This magic test requires explanation.
1469                                  *
1470                                  *   We know, that some child q is already
1471                                  *   attached to this parent and have choice:
1472                                  *   either to change it or to create/graft new one.
1473                                  *
1474                                  *   1. We are allowed to create/graft only
1475                                  *   if CREATE and REPLACE flags are set.
1476                                  *
1477                                  *   2. If EXCL is set, requestor wanted to say,
1478                                  *   that qdisc tcm_handle is not expected
1479                                  *   to exist, so that we choose create/graft too.
1480                                  *
1481                                  *   3. The last case is when no flags are set.
1482                                  *   Alas, it is sort of hole in API, we
1483                                  *   cannot decide what to do unambiguously.
1484                                  *   For now we select create/graft, if
1485                                  *   user gave KIND, which does not match existing.
1486                                  */
1487                                 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1488                                     (n->nlmsg_flags & NLM_F_REPLACE) &&
1489                                     ((n->nlmsg_flags & NLM_F_EXCL) ||
1490                                      (tca[TCA_KIND] &&
1491                                       nla_strcmp(tca[TCA_KIND], q->ops->id))))
1492                                         goto create_n_graft;
1493                         }
1494                 }
1495         } else {
1496                 if (!tcm->tcm_handle) {
1497                         NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1498                         return -EINVAL;
1499                 }
1500                 q = qdisc_lookup(dev, tcm->tcm_handle);
1501         }
1502
1503         /* Change qdisc parameters */
1504         if (!q) {
1505                 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1506                 return -ENOENT;
1507         }
1508         if (n->nlmsg_flags & NLM_F_EXCL) {
1509                 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1510                 return -EEXIST;
1511         }
1512         if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1513                 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1514                 return -EINVAL;
1515         }
1516         err = qdisc_change(q, tca, extack);
1517         if (err == 0)
1518                 qdisc_notify(net, skb, n, clid, NULL, q);
1519         return err;
1520
1521 create_n_graft:
1522         if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1523                 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1524                 return -ENOENT;
1525         }
1526         if (clid == TC_H_INGRESS) {
1527                 if (dev_ingress_queue(dev)) {
1528                         q = qdisc_create(dev, dev_ingress_queue(dev), p,
1529                                          tcm->tcm_parent, tcm->tcm_parent,
1530                                          tca, &err, extack);
1531                 } else {
1532                         NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
1533                         err = -ENOENT;
1534                 }
1535         } else {
1536                 struct netdev_queue *dev_queue;
1537
1538                 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
1539                         dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1540                 else if (p)
1541                         dev_queue = p->dev_queue;
1542                 else
1543                         dev_queue = netdev_get_tx_queue(dev, 0);
1544
1545                 q = qdisc_create(dev, dev_queue, p,
1546                                  tcm->tcm_parent, tcm->tcm_handle,
1547                                  tca, &err, extack);
1548         }
1549         if (q == NULL) {
1550                 if (err == -EAGAIN)
1551                         goto replay;
1552                 return err;
1553         }
1554
1555 graft:
1556         err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
1557         if (err) {
1558                 if (q)
1559                         qdisc_destroy(q);
1560                 return err;
1561         }
1562
1563         return 0;
1564 }
1565
1566 static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1567                               struct netlink_callback *cb,
1568                               int *q_idx_p, int s_q_idx, bool recur,
1569                               bool dump_invisible)
1570 {
1571         int ret = 0, q_idx = *q_idx_p;
1572         struct Qdisc *q;
1573         int b;
1574
1575         if (!root)
1576                 return 0;
1577
1578         q = root;
1579         if (q_idx < s_q_idx) {
1580                 q_idx++;
1581         } else {
1582                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1583                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1584                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1585                                   RTM_NEWQDISC) <= 0)
1586                         goto done;
1587                 q_idx++;
1588         }
1589
1590         /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1591          * itself has already been dumped.
1592          *
1593          * If we've already dumped the top-level (ingress) qdisc above and the global
1594          * qdisc hashtable, we don't want to hit it again
1595          */
1596         if (!qdisc_dev(root) || !recur)
1597                 goto out;
1598
1599         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
1600                 if (q_idx < s_q_idx) {
1601                         q_idx++;
1602                         continue;
1603                 }
1604                 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
1605                     tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
1606                                   cb->nlh->nlmsg_seq, NLM_F_MULTI,
1607                                   RTM_NEWQDISC) <= 0)
1608                         goto done;
1609                 q_idx++;
1610         }
1611
1612 out:
1613         *q_idx_p = q_idx;
1614         return ret;
1615 done:
1616         ret = -1;
1617         goto out;
1618 }
1619
1620 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1621 {
1622         struct net *net = sock_net(skb->sk);
1623         int idx, q_idx;
1624         int s_idx, s_q_idx;
1625         struct net_device *dev;
1626         const struct nlmsghdr *nlh = cb->nlh;
1627         struct nlattr *tca[TCA_MAX + 1];
1628         int err;
1629
1630         s_idx = cb->args[0];
1631         s_q_idx = q_idx = cb->args[1];
1632
1633         idx = 0;
1634         ASSERT_RTNL();
1635
1636         err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
1637         if (err < 0)
1638                 return err;
1639
1640         for_each_netdev(net, dev) {
1641                 struct netdev_queue *dev_queue;
1642
1643                 if (idx < s_idx)
1644                         goto cont;
1645                 if (idx > s_idx)
1646                         s_q_idx = 0;
1647                 q_idx = 0;
1648
1649                 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
1650                                        true, tca[TCA_DUMP_INVISIBLE]) < 0)
1651                         goto done;
1652
1653                 dev_queue = dev_ingress_queue(dev);
1654                 if (dev_queue &&
1655                     tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1656                                        &q_idx, s_q_idx, false,
1657                                        tca[TCA_DUMP_INVISIBLE]) < 0)
1658                         goto done;
1659
1660 cont:
1661                 idx++;
1662         }
1663
1664 done:
1665         cb->args[0] = idx;
1666         cb->args[1] = q_idx;
1667
1668         return skb->len;
1669 }
1670
1671
1672
1673 /************************************************
1674  *      Traffic classes manipulation.           *
1675  ************************************************/
1676
1677 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1678                           unsigned long cl,
1679                           u32 portid, u32 seq, u16 flags, int event)
1680 {
1681         struct tcmsg *tcm;
1682         struct nlmsghdr  *nlh;
1683         unsigned char *b = skb_tail_pointer(skb);
1684         struct gnet_dump d;
1685         const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1686
1687         cond_resched();
1688         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1689         if (!nlh)
1690                 goto out_nlmsg_trim;
1691         tcm = nlmsg_data(nlh);
1692         tcm->tcm_family = AF_UNSPEC;
1693         tcm->tcm__pad1 = 0;
1694         tcm->tcm__pad2 = 0;
1695         tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1696         tcm->tcm_parent = q->handle;
1697         tcm->tcm_handle = q->handle;
1698         tcm->tcm_info = 0;
1699         if (nla_put_string(skb, TCA_KIND, q->ops->id))
1700                 goto nla_put_failure;
1701         if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1702                 goto nla_put_failure;
1703
1704         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1705                                          NULL, &d, TCA_PAD) < 0)
1706                 goto nla_put_failure;
1707
1708         if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1709                 goto nla_put_failure;
1710
1711         if (gnet_stats_finish_copy(&d) < 0)
1712                 goto nla_put_failure;
1713
1714         nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1715         return skb->len;
1716
1717 out_nlmsg_trim:
1718 nla_put_failure:
1719         nlmsg_trim(skb, b);
1720         return -1;
1721 }
1722
1723 static int tclass_notify(struct net *net, struct sk_buff *oskb,
1724                          struct nlmsghdr *n, struct Qdisc *q,
1725                          unsigned long cl, int event)
1726 {
1727         struct sk_buff *skb;
1728         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1729
1730         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1731         if (!skb)
1732                 return -ENOBUFS;
1733
1734         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1735                 kfree_skb(skb);
1736                 return -EINVAL;
1737         }
1738
1739         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1740                               n->nlmsg_flags & NLM_F_ECHO);
1741 }
1742
1743 static int tclass_del_notify(struct net *net,
1744                              const struct Qdisc_class_ops *cops,
1745                              struct sk_buff *oskb, struct nlmsghdr *n,
1746                              struct Qdisc *q, unsigned long cl)
1747 {
1748         u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1749         struct sk_buff *skb;
1750         int err = 0;
1751
1752         if (!cops->delete)
1753                 return -EOPNOTSUPP;
1754
1755         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1756         if (!skb)
1757                 return -ENOBUFS;
1758
1759         if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1760                            RTM_DELTCLASS) < 0) {
1761                 kfree_skb(skb);
1762                 return -EINVAL;
1763         }
1764
1765         err = cops->delete(q, cl);
1766         if (err) {
1767                 kfree_skb(skb);
1768                 return err;
1769         }
1770
1771         return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1772                               n->nlmsg_flags & NLM_F_ECHO);
1773 }
1774
1775 #ifdef CONFIG_NET_CLS
1776
1777 struct tcf_bind_args {
1778         struct tcf_walker w;
1779         u32 classid;
1780         unsigned long cl;
1781 };
1782
1783 static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1784 {
1785         struct tcf_bind_args *a = (void *)arg;
1786
1787         if (tp->ops->bind_class) {
1788                 struct Qdisc *q = tcf_block_q(tp->chain->block);
1789
1790                 sch_tree_lock(q);
1791                 tp->ops->bind_class(n, a->classid, a->cl);
1792                 sch_tree_unlock(q);
1793         }
1794         return 0;
1795 }
1796
1797 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1798                            unsigned long new_cl)
1799 {
1800         const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1801         struct tcf_block *block;
1802         struct tcf_chain *chain;
1803         unsigned long cl;
1804
1805         cl = cops->find(q, portid);
1806         if (!cl)
1807                 return;
1808         block = cops->tcf_block(q, cl, NULL);
1809         if (!block)
1810                 return;
1811         list_for_each_entry(chain, &block->chain_list, list) {
1812                 struct tcf_proto *tp;
1813
1814                 for (tp = rtnl_dereference(chain->filter_chain);
1815                      tp; tp = rtnl_dereference(tp->next)) {
1816                         struct tcf_bind_args arg = {};
1817
1818                         arg.w.fn = tcf_node_bind;
1819                         arg.classid = clid;
1820                         arg.cl = new_cl;
1821                         tp->ops->walk(tp, &arg.w);
1822                 }
1823         }
1824 }
1825
1826 #else
1827
1828 static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1829                            unsigned long new_cl)
1830 {
1831 }
1832
1833 #endif
1834
1835 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1836                          struct netlink_ext_ack *extack)
1837 {
1838         struct net *net = sock_net(skb->sk);
1839         struct tcmsg *tcm = nlmsg_data(n);
1840         struct nlattr *tca[TCA_MAX + 1];
1841         struct net_device *dev;
1842         struct Qdisc *q = NULL;
1843         const struct Qdisc_class_ops *cops;
1844         unsigned long cl = 0;
1845         unsigned long new_cl;
1846         u32 portid;
1847         u32 clid;
1848         u32 qid;
1849         int err;
1850
1851         if ((n->nlmsg_type != RTM_GETTCLASS) &&
1852             !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1853                 return -EPERM;
1854
1855         err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1856         if (err < 0)
1857                 return err;
1858
1859         dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1860         if (!dev)
1861                 return -ENODEV;
1862
1863         /*
1864            parent == TC_H_UNSPEC - unspecified parent.
1865            parent == TC_H_ROOT   - class is root, which has no parent.
1866            parent == X:0         - parent is root class.
1867            parent == X:Y         - parent is a node in hierarchy.
1868            parent == 0:Y         - parent is X:Y, where X:0 is qdisc.
1869
1870            handle == 0:0         - generate handle from kernel pool.
1871            handle == 0:Y         - class is X:Y, where X:0 is qdisc.
1872            handle == X:Y         - clear.
1873            handle == X:0         - root class.
1874          */
1875
1876         /* Step 1. Determine qdisc handle X:0 */
1877
1878         portid = tcm->tcm_parent;
1879         clid = tcm->tcm_handle;
1880         qid = TC_H_MAJ(clid);
1881
1882         if (portid != TC_H_ROOT) {
1883                 u32 qid1 = TC_H_MAJ(portid);
1884
1885                 if (qid && qid1) {
1886                         /* If both majors are known, they must be identical. */
1887                         if (qid != qid1)
1888                                 return -EINVAL;
1889                 } else if (qid1) {
1890                         qid = qid1;
1891                 } else if (qid == 0)
1892                         qid = dev->qdisc->handle;
1893
1894                 /* Now qid is genuine qdisc handle consistent
1895                  * both with parent and child.
1896                  *
1897                  * TC_H_MAJ(portid) still may be unspecified, complete it now.
1898                  */
1899                 if (portid)
1900                         portid = TC_H_MAKE(qid, portid);
1901         } else {
1902                 if (qid == 0)
1903                         qid = dev->qdisc->handle;
1904         }
1905
1906         /* OK. Locate qdisc */
1907         q = qdisc_lookup(dev, qid);
1908         if (!q)
1909                 return -ENOENT;
1910
1911         /* An check that it supports classes */
1912         cops = q->ops->cl_ops;
1913         if (cops == NULL)
1914                 return -EINVAL;
1915
1916         /* Now try to get class */
1917         if (clid == 0) {
1918                 if (portid == TC_H_ROOT)
1919                         clid = qid;
1920         } else
1921                 clid = TC_H_MAKE(qid, clid);
1922
1923         if (clid)
1924                 cl = cops->find(q, clid);
1925
1926         if (cl == 0) {
1927                 err = -ENOENT;
1928                 if (n->nlmsg_type != RTM_NEWTCLASS ||
1929                     !(n->nlmsg_flags & NLM_F_CREATE))
1930                         goto out;
1931         } else {
1932                 switch (n->nlmsg_type) {
1933                 case RTM_NEWTCLASS:
1934                         err = -EEXIST;
1935                         if (n->nlmsg_flags & NLM_F_EXCL)
1936                                 goto out;
1937                         break;
1938                 case RTM_DELTCLASS:
1939                         err = tclass_del_notify(net, cops, skb, n, q, cl);
1940                         /* Unbind the class with flilters with 0 */
1941                         tc_bind_tclass(q, portid, clid, 0);
1942                         goto out;
1943                 case RTM_GETTCLASS:
1944                         err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1945                         goto out;
1946                 default:
1947                         err = -EINVAL;
1948                         goto out;
1949                 }
1950         }
1951
1952         if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1953                 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
1954                 return -EOPNOTSUPP;
1955         }
1956
1957         new_cl = cl;
1958         err = -EOPNOTSUPP;
1959         if (cops->change)
1960                 err = cops->change(q, clid, portid, tca, &new_cl, extack);
1961         if (err == 0) {
1962                 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1963                 /* We just create a new class, need to do reverse binding. */
1964                 if (cl != new_cl)
1965                         tc_bind_tclass(q, portid, clid, new_cl);
1966         }
1967 out:
1968         return err;
1969 }
1970
1971 struct qdisc_dump_args {
1972         struct qdisc_walker     w;
1973         struct sk_buff          *skb;
1974         struct netlink_callback *cb;
1975 };
1976
1977 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1978                             struct qdisc_walker *arg)
1979 {
1980         struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1981
1982         return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
1983                               a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1984                               RTM_NEWTCLASS);
1985 }
1986
1987 static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1988                                 struct tcmsg *tcm, struct netlink_callback *cb,
1989                                 int *t_p, int s_t)
1990 {
1991         struct qdisc_dump_args arg;
1992
1993         if (tc_qdisc_dump_ignore(q, false) ||
1994             *t_p < s_t || !q->ops->cl_ops ||
1995             (tcm->tcm_parent &&
1996              TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1997                 (*t_p)++;
1998                 return 0;
1999         }
2000         if (*t_p > s_t)
2001                 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2002         arg.w.fn = qdisc_class_dump;
2003         arg.skb = skb;
2004         arg.cb = cb;
2005         arg.w.stop  = 0;
2006         arg.w.skip = cb->args[1];
2007         arg.w.count = 0;
2008         q->ops->cl_ops->walk(q, &arg.w);
2009         cb->args[1] = arg.w.count;
2010         if (arg.w.stop)
2011                 return -1;
2012         (*t_p)++;
2013         return 0;
2014 }
2015
2016 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2017                                struct tcmsg *tcm, struct netlink_callback *cb,
2018                                int *t_p, int s_t)
2019 {
2020         struct Qdisc *q;
2021         int b;
2022
2023         if (!root)
2024                 return 0;
2025
2026         if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2027                 return -1;
2028
2029         if (!qdisc_dev(root))
2030                 return 0;
2031
2032         if (tcm->tcm_parent) {
2033                 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
2034                 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2035                         return -1;
2036                 return 0;
2037         }
2038         hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
2039                 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2040                         return -1;
2041         }
2042
2043         return 0;
2044 }
2045
2046 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2047 {
2048         struct tcmsg *tcm = nlmsg_data(cb->nlh);
2049         struct net *net = sock_net(skb->sk);
2050         struct netdev_queue *dev_queue;
2051         struct net_device *dev;
2052         int t, s_t;
2053
2054         if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2055                 return 0;
2056         dev = dev_get_by_index(net, tcm->tcm_ifindex);
2057         if (!dev)
2058                 return 0;
2059
2060         s_t = cb->args[0];
2061         t = 0;
2062
2063         if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
2064                 goto done;
2065
2066         dev_queue = dev_ingress_queue(dev);
2067         if (dev_queue &&
2068             tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2069                                 &t, s_t) < 0)
2070                 goto done;
2071
2072 done:
2073         cb->args[0] = t;
2074
2075         dev_put(dev);
2076         return skb->len;
2077 }
2078
2079 #ifdef CONFIG_PROC_FS
2080 static int psched_show(struct seq_file *seq, void *v)
2081 {
2082         seq_printf(seq, "%08x %08x %08x %08x\n",
2083                    (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
2084                    1000000,
2085                    (u32)NSEC_PER_SEC / hrtimer_resolution);
2086
2087         return 0;
2088 }
2089
2090 static int psched_open(struct inode *inode, struct file *file)
2091 {
2092         return single_open(file, psched_show, NULL);
2093 }
2094
2095 static const struct file_operations psched_fops = {
2096         .open = psched_open,
2097         .read  = seq_read,
2098         .llseek = seq_lseek,
2099         .release = single_release,
2100 };
2101
2102 static int __net_init psched_net_init(struct net *net)
2103 {
2104         struct proc_dir_entry *e;
2105
2106         e = proc_create("psched", 0, net->proc_net, &psched_fops);
2107         if (e == NULL)
2108                 return -ENOMEM;
2109
2110         return 0;
2111 }
2112
2113 static void __net_exit psched_net_exit(struct net *net)
2114 {
2115         remove_proc_entry("psched", net->proc_net);
2116 }
2117 #else
2118 static int __net_init psched_net_init(struct net *net)
2119 {
2120         return 0;
2121 }
2122
2123 static void __net_exit psched_net_exit(struct net *net)
2124 {
2125 }
2126 #endif
2127
2128 static struct pernet_operations psched_net_ops = {
2129         .init = psched_net_init,
2130         .exit = psched_net_exit,
2131 };
2132
2133 static int __init pktsched_init(void)
2134 {
2135         int err;
2136
2137         err = register_pernet_subsys(&psched_net_ops);
2138         if (err) {
2139                 pr_err("pktsched_init: "
2140                        "cannot initialize per netns operations\n");
2141                 return err;
2142         }
2143
2144         register_qdisc(&pfifo_fast_ops);
2145         register_qdisc(&pfifo_qdisc_ops);
2146         register_qdisc(&bfifo_qdisc_ops);
2147         register_qdisc(&pfifo_head_drop_qdisc_ops);
2148         register_qdisc(&mq_qdisc_ops);
2149         register_qdisc(&noqueue_qdisc_ops);
2150
2151         rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2152         rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
2153         rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
2154                       0);
2155         rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2156         rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
2157         rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
2158                       0);
2159
2160         return 0;
2161 }
2162
2163 subsys_initcall(pktsched_init);