Merge tag 'mlx5-GRE-Offload' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed...
[linux-2.6-block.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
25bfcd5a 30#include <linux/lockdep.h>
5a0e3ad6 31#include <linux/slab.h>
59cc1f61 32#include <linux/hashtable.h>
1da177e4 33
457c4cbc 34#include <net/net_namespace.h>
b854272b 35#include <net/sock.h>
dc5fc579 36#include <net/netlink.h>
1da177e4
LT
37#include <net/pkt_sched.h>
38
1da177e4
LT
39/*
40
41 Short review.
42 -------------
43
44 This file consists of two interrelated parts:
45
46 1. queueing disciplines manager frontend.
47 2. traffic classes manager frontend.
48
49 Generally, queueing discipline ("qdisc") is a black box,
50 which is able to enqueue packets and to dequeue them (when
51 device is ready to send something) in order and at times
52 determined by algorithm hidden in it.
53
54 qdisc's are divided to two categories:
55 - "queues", which have no internal structure visible from outside.
56 - "schedulers", which split all the packets to "traffic classes",
57 using "packet classifiers" (look at cls_api.c)
58
59 In turn, classes may have child qdiscs (as rule, queues)
60 attached to them etc. etc. etc.
61
62 The goal of the routines in this file is to translate
63 information supplied by user in the form of handles
64 to more intelligible for kernel form, to make some sanity
65 checks and part of work, which is common to all qdiscs
66 and to provide rtnetlink notifications.
67
68 All real intelligent work is done inside qdisc modules.
69
70
71
72 Every discipline has two major routines: enqueue and dequeue.
73
74 ---dequeue
75
76 dequeue usually returns a skb to send. It is allowed to return NULL,
77 but it does not mean that queue is empty, it just means that
78 discipline does not want to send anything this time.
79 Queue is really empty if q->q.qlen == 0.
80 For complicated disciplines with multiple queues q->q is not
81 real packet queue, but however q->q.qlen must be valid.
82
83 ---enqueue
84
85 enqueue returns 0, if packet was enqueued successfully.
86 If packet (this one or another one) was dropped, it returns
87 not zero error code.
88 NET_XMIT_DROP - this packet dropped
89 Expected action: do not backoff, but wait until queue will clear.
90 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
91 Expected action: backoff or ignore
1da177e4
LT
92
93 Auxiliary routines:
94
99c0db26
JP
95 ---peek
96
97 like dequeue but without removing a packet from the queue
98
1da177e4
LT
99 ---reset
100
101 returns qdisc to initial state: purge all buffers, clear all
102 timers, counters (except for statistics) etc.
103
104 ---init
105
106 initializes newly created qdisc.
107
108 ---destroy
109
110 destroys resources allocated by init and during lifetime of qdisc.
111
112 ---change
113
114 changes qdisc parameters.
115 */
116
117/* Protects list of registered TC modules. It is pure SMP lock. */
118static DEFINE_RWLOCK(qdisc_mod_lock);
119
120
121/************************************************
122 * Queueing disciplines manipulation. *
123 ************************************************/
124
125
126/* The list of all installed queueing disciplines. */
127
128static struct Qdisc_ops *qdisc_base;
129
21eb2189 130/* Register/unregister queueing discipline */
1da177e4
LT
131
132int register_qdisc(struct Qdisc_ops *qops)
133{
134 struct Qdisc_ops *q, **qp;
135 int rc = -EEXIST;
136
137 write_lock(&qdisc_mod_lock);
138 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
139 if (!strcmp(qops->id, q->id))
140 goto out;
141
142 if (qops->enqueue == NULL)
143 qops->enqueue = noop_qdisc_ops.enqueue;
99c0db26 144 if (qops->peek == NULL) {
68fd26b5 145 if (qops->dequeue == NULL)
99c0db26 146 qops->peek = noop_qdisc_ops.peek;
68fd26b5
JP
147 else
148 goto out_einval;
99c0db26 149 }
1da177e4
LT
150 if (qops->dequeue == NULL)
151 qops->dequeue = noop_qdisc_ops.dequeue;
152
68fd26b5
JP
153 if (qops->cl_ops) {
154 const struct Qdisc_class_ops *cops = qops->cl_ops;
155
143976ce 156 if (!(cops->find && cops->walk && cops->leaf))
68fd26b5
JP
157 goto out_einval;
158
6529eaba 159 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
68fd26b5
JP
160 goto out_einval;
161 }
162
1da177e4
LT
163 qops->next = NULL;
164 *qp = qops;
165 rc = 0;
166out:
167 write_unlock(&qdisc_mod_lock);
168 return rc;
68fd26b5
JP
169
170out_einval:
171 rc = -EINVAL;
172 goto out;
1da177e4 173}
62e3ba1b 174EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
175
176int unregister_qdisc(struct Qdisc_ops *qops)
177{
178 struct Qdisc_ops *q, **qp;
179 int err = -ENOENT;
180
181 write_lock(&qdisc_mod_lock);
cc7ec456 182 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1da177e4
LT
183 if (q == qops)
184 break;
185 if (q) {
186 *qp = q->next;
187 q->next = NULL;
188 err = 0;
189 }
190 write_unlock(&qdisc_mod_lock);
191 return err;
192}
62e3ba1b 193EXPORT_SYMBOL(unregister_qdisc);
1da177e4 194
6da7c8fc 195/* Get default qdisc if not otherwise specified */
196void qdisc_get_default(char *name, size_t len)
197{
198 read_lock(&qdisc_mod_lock);
199 strlcpy(name, default_qdisc_ops->id, len);
200 read_unlock(&qdisc_mod_lock);
201}
202
203static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204{
205 struct Qdisc_ops *q = NULL;
206
207 for (q = qdisc_base; q; q = q->next) {
208 if (!strcmp(name, q->id)) {
209 if (!try_module_get(q->owner))
210 q = NULL;
211 break;
212 }
213 }
214
215 return q;
216}
217
218/* Set new default qdisc to use */
219int qdisc_set_default(const char *name)
220{
221 const struct Qdisc_ops *ops;
222
223 if (!capable(CAP_NET_ADMIN))
224 return -EPERM;
225
226 write_lock(&qdisc_mod_lock);
227 ops = qdisc_lookup_default(name);
228 if (!ops) {
229 /* Not found, drop lock and try to load module */
230 write_unlock(&qdisc_mod_lock);
231 request_module("sch_%s", name);
232 write_lock(&qdisc_mod_lock);
233
234 ops = qdisc_lookup_default(name);
235 }
236
237 if (ops) {
238 /* Set new default */
239 module_put(default_qdisc_ops->owner);
240 default_qdisc_ops = ops;
241 }
242 write_unlock(&qdisc_mod_lock);
243
244 return ops ? 0 : -ENOENT;
245}
246
8ea3e439 247#ifdef CONFIG_NET_SCH_DEFAULT
248/* Set default value from kernel config */
249static int __init sch_default_qdisc(void)
250{
251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252}
253late_initcall(sch_default_qdisc);
254#endif
255
1da177e4 256/* We know handle. Find qdisc among all qdisc's attached to device
4eaf3b84
ED
257 * (root qdisc, all its children, children of children etc.)
258 * Note: caller either uses rtnl or rcu_read_lock()
1da177e4
LT
259 */
260
6113b748 261static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421
DM
262{
263 struct Qdisc *q;
264
69012ae4
JK
265 if (!qdisc_dev(root))
266 return (root->handle == handle ? root : NULL);
267
8123b421
DM
268 if (!(root->flags & TCQ_F_BUILTIN) &&
269 root->handle == handle)
270 return root;
271
59cc1f61 272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
8123b421
DM
273 if (q->handle == handle)
274 return q;
275 }
276 return NULL;
277}
278
49b49971 279void qdisc_hash_add(struct Qdisc *q, bool invisible)
f6e0b239 280{
37314363 281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
4eaf3b84 282 ASSERT_RTNL();
59cc1f61 283 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
49b49971
JK
284 if (invisible)
285 q->flags |= TCQ_F_INVISIBLE;
37314363 286 }
f6e0b239 287}
59cc1f61 288EXPORT_SYMBOL(qdisc_hash_add);
f6e0b239 289
59cc1f61 290void qdisc_hash_del(struct Qdisc *q)
f6e0b239 291{
4eaf3b84
ED
292 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
293 ASSERT_RTNL();
59cc1f61 294 hash_del_rcu(&q->hash);
4eaf3b84 295 }
f6e0b239 296}
59cc1f61 297EXPORT_SYMBOL(qdisc_hash_del);
f6e0b239 298
ead81cc5 299struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 300{
f6e0b239
JP
301 struct Qdisc *q;
302
af356afa
PM
303 q = qdisc_match_from_root(dev->qdisc, handle);
304 if (q)
305 goto out;
f6e0b239 306
24824a09
ED
307 if (dev_ingress_queue(dev))
308 q = qdisc_match_from_root(
309 dev_ingress_queue(dev)->qdisc_sleeping,
310 handle);
f6486d40 311out:
f6e0b239 312 return q;
1da177e4
LT
313}
314
315static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
316{
317 unsigned long cl;
318 struct Qdisc *leaf;
20fea08b 319 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
320
321 if (cops == NULL)
322 return NULL;
143976ce 323 cl = cops->find(p, classid);
1da177e4
LT
324
325 if (cl == 0)
326 return NULL;
327 leaf = cops->leaf(p, cl);
1da177e4
LT
328 return leaf;
329}
330
331/* Find queueing discipline by name */
332
1e90474c 333static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
334{
335 struct Qdisc_ops *q = NULL;
336
337 if (kind) {
338 read_lock(&qdisc_mod_lock);
339 for (q = qdisc_base; q; q = q->next) {
1e90474c 340 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
341 if (!try_module_get(q->owner))
342 q = NULL;
343 break;
344 }
345 }
346 read_unlock(&qdisc_mod_lock);
347 }
348 return q;
349}
350
8a8e3d84
JDB
351/* The linklayer setting were not transferred from iproute2, in older
352 * versions, and the rate tables lookup systems have been dropped in
353 * the kernel. To keep backward compatible with older iproute2 tc
354 * utils, we detect the linklayer setting by detecting if the rate
355 * table were modified.
356 *
357 * For linklayer ATM table entries, the rate table will be aligned to
358 * 48 bytes, thus some table entries will contain the same value. The
359 * mpu (min packet unit) is also encoded into the old rate table, thus
360 * starting from the mpu, we find low and high table entries for
361 * mapping this cell. If these entries contain the same value, when
362 * the rate tables have been modified for linklayer ATM.
363 *
364 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
365 * and then roundup to the next cell, calc the table entry one below,
366 * and compare.
367 */
368static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
369{
370 int low = roundup(r->mpu, 48);
371 int high = roundup(low+1, 48);
372 int cell_low = low >> r->cell_log;
373 int cell_high = (high >> r->cell_log) - 1;
374
375 /* rtab is too inaccurate at rates > 100Mbit/s */
376 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
377 pr_debug("TC linklayer: Giving up ATM detection\n");
378 return TC_LINKLAYER_ETHERNET;
379 }
380
381 if ((cell_high > cell_low) && (cell_high < 256)
382 && (rtab[cell_low] == rtab[cell_high])) {
383 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
384 cell_low, cell_high, rtab[cell_high]);
385 return TC_LINKLAYER_ATM;
386 }
387 return TC_LINKLAYER_ETHERNET;
388}
389
1da177e4
LT
390static struct qdisc_rate_table *qdisc_rtab_list;
391
5a7a5555
JHS
392struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
393 struct nlattr *tab)
1da177e4
LT
394{
395 struct qdisc_rate_table *rtab;
396
40edeff6
ED
397 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
398 nla_len(tab) != TC_RTAB_SIZE)
399 return NULL;
400
1da177e4 401 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
40edeff6
ED
402 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
403 !memcmp(&rtab->data, nla_data(tab), 1024)) {
1da177e4
LT
404 rtab->refcnt++;
405 return rtab;
406 }
407 }
408
1da177e4
LT
409 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
410 if (rtab) {
411 rtab->rate = *r;
412 rtab->refcnt = 1;
1e90474c 413 memcpy(rtab->data, nla_data(tab), 1024);
8a8e3d84
JDB
414 if (r->linklayer == TC_LINKLAYER_UNAWARE)
415 r->linklayer = __detect_linklayer(r, rtab->data);
1da177e4
LT
416 rtab->next = qdisc_rtab_list;
417 qdisc_rtab_list = rtab;
418 }
419 return rtab;
420}
62e3ba1b 421EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
422
423void qdisc_put_rtab(struct qdisc_rate_table *tab)
424{
425 struct qdisc_rate_table *rtab, **rtabp;
426
427 if (!tab || --tab->refcnt)
428 return;
429
cc7ec456
ED
430 for (rtabp = &qdisc_rtab_list;
431 (rtab = *rtabp) != NULL;
432 rtabp = &rtab->next) {
1da177e4
LT
433 if (rtab == tab) {
434 *rtabp = rtab->next;
435 kfree(rtab);
436 return;
437 }
438 }
439}
62e3ba1b 440EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 441
175f9c1b 442static LIST_HEAD(qdisc_stab_list);
175f9c1b
JK
443
444static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
445 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
446 [TCA_STAB_DATA] = { .type = NLA_BINARY },
447};
448
449static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
450{
451 struct nlattr *tb[TCA_STAB_MAX + 1];
452 struct qdisc_size_table *stab;
453 struct tc_sizespec *s;
454 unsigned int tsize = 0;
455 u16 *tab = NULL;
456 int err;
457
fceb6435 458 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, NULL);
175f9c1b
JK
459 if (err < 0)
460 return ERR_PTR(err);
461 if (!tb[TCA_STAB_BASE])
462 return ERR_PTR(-EINVAL);
463
464 s = nla_data(tb[TCA_STAB_BASE]);
465
466 if (s->tsize > 0) {
467 if (!tb[TCA_STAB_DATA])
468 return ERR_PTR(-EINVAL);
469 tab = nla_data(tb[TCA_STAB_DATA]);
470 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
471 }
472
00093fab 473 if (tsize != s->tsize || (!tab && tsize > 0))
175f9c1b
JK
474 return ERR_PTR(-EINVAL);
475
175f9c1b
JK
476 list_for_each_entry(stab, &qdisc_stab_list, list) {
477 if (memcmp(&stab->szopts, s, sizeof(*s)))
478 continue;
479 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
480 continue;
481 stab->refcnt++;
175f9c1b
JK
482 return stab;
483 }
484
175f9c1b
JK
485 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
486 if (!stab)
487 return ERR_PTR(-ENOMEM);
488
489 stab->refcnt = 1;
490 stab->szopts = *s;
491 if (tsize > 0)
492 memcpy(stab->data, tab, tsize * sizeof(u16));
493
175f9c1b 494 list_add_tail(&stab->list, &qdisc_stab_list);
175f9c1b
JK
495
496 return stab;
497}
498
a2da570d
ED
499static void stab_kfree_rcu(struct rcu_head *head)
500{
501 kfree(container_of(head, struct qdisc_size_table, rcu));
502}
503
175f9c1b
JK
504void qdisc_put_stab(struct qdisc_size_table *tab)
505{
506 if (!tab)
507 return;
508
175f9c1b
JK
509 if (--tab->refcnt == 0) {
510 list_del(&tab->list);
a2da570d 511 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
175f9c1b 512 }
175f9c1b
JK
513}
514EXPORT_SYMBOL(qdisc_put_stab);
515
516static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
517{
518 struct nlattr *nest;
519
520 nest = nla_nest_start(skb, TCA_STAB);
3aa4614d
PM
521 if (nest == NULL)
522 goto nla_put_failure;
1b34ec43
DM
523 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
524 goto nla_put_failure;
175f9c1b
JK
525 nla_nest_end(skb, nest);
526
527 return skb->len;
528
529nla_put_failure:
530 return -1;
531}
532
5a7a5555
JHS
533void __qdisc_calculate_pkt_len(struct sk_buff *skb,
534 const struct qdisc_size_table *stab)
175f9c1b
JK
535{
536 int pkt_len, slot;
537
538 pkt_len = skb->len + stab->szopts.overhead;
539 if (unlikely(!stab->szopts.tsize))
540 goto out;
541
542 slot = pkt_len + stab->szopts.cell_align;
543 if (unlikely(slot < 0))
544 slot = 0;
545
546 slot >>= stab->szopts.cell_log;
547 if (likely(slot < stab->szopts.tsize))
548 pkt_len = stab->data[slot];
549 else
550 pkt_len = stab->data[stab->szopts.tsize - 1] *
551 (slot / stab->szopts.tsize) +
552 stab->data[slot % stab->szopts.tsize];
553
554 pkt_len <<= stab->szopts.size_log;
555out:
556 if (unlikely(pkt_len < 1))
557 pkt_len = 1;
558 qdisc_skb_cb(skb)->pkt_len = pkt_len;
559}
a2da570d 560EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
175f9c1b 561
6e765a00 562void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
b00355db
JP
563{
564 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
cc7ec456
ED
565 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
566 txt, qdisc->ops->id, qdisc->handle >> 16);
b00355db
JP
567 qdisc->flags |= TCQ_F_WARN_NONWC;
568 }
569}
570EXPORT_SYMBOL(qdisc_warn_nonwc);
571
4179477f
PM
572static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
573{
574 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da3 575 timer);
4179477f 576
1e203c1a 577 rcu_read_lock();
8608db03 578 __netif_schedule(qdisc_root(wd->qdisc));
1e203c1a 579 rcu_read_unlock();
1936502d 580
4179477f
PM
581 return HRTIMER_NORESTART;
582}
583
584void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
585{
4a8e320c 586 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
2fbd3da3 587 wd->timer.function = qdisc_watchdog;
4179477f
PM
588 wd->qdisc = qdisc;
589}
590EXPORT_SYMBOL(qdisc_watchdog_init);
591
45f50bed 592void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
4179477f 593{
2540e051
JP
594 if (test_bit(__QDISC_STATE_DEACTIVATED,
595 &qdisc_root_sleeping(wd->qdisc)->state))
596 return;
597
a9efad8b
ED
598 if (wd->last_expires == expires)
599 return;
600
601 wd->last_expires = expires;
46baac38 602 hrtimer_start(&wd->timer,
34c5d292 603 ns_to_ktime(expires),
4a8e320c 604 HRTIMER_MODE_ABS_PINNED);
4179477f 605}
34c5d292 606EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
4179477f
PM
607
608void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
609{
2fbd3da3 610 hrtimer_cancel(&wd->timer);
4179477f
PM
611}
612EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 613
a94f779f 614static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5 615{
6fe1c7a5 616 struct hlist_head *h;
9695fe6f 617 unsigned int i;
6fe1c7a5 618
9695fe6f 619 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
6fe1c7a5
PM
620
621 if (h != NULL) {
622 for (i = 0; i < n; i++)
623 INIT_HLIST_HEAD(&h[i]);
624 }
625 return h;
626}
627
6fe1c7a5
PM
628void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
629{
630 struct Qdisc_class_common *cl;
b67bfe0d 631 struct hlist_node *next;
6fe1c7a5
PM
632 struct hlist_head *nhash, *ohash;
633 unsigned int nsize, nmask, osize;
634 unsigned int i, h;
635
636 /* Rehash when load factor exceeds 0.75 */
637 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
638 return;
639 nsize = clhash->hashsize * 2;
640 nmask = nsize - 1;
641 nhash = qdisc_class_hash_alloc(nsize);
642 if (nhash == NULL)
643 return;
644
645 ohash = clhash->hash;
646 osize = clhash->hashsize;
647
648 sch_tree_lock(sch);
649 for (i = 0; i < osize; i++) {
b67bfe0d 650 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
6fe1c7a5
PM
651 h = qdisc_class_hash(cl->classid, nmask);
652 hlist_add_head(&cl->hnode, &nhash[h]);
653 }
654 }
655 clhash->hash = nhash;
656 clhash->hashsize = nsize;
657 clhash->hashmask = nmask;
658 sch_tree_unlock(sch);
659
9695fe6f 660 kvfree(ohash);
6fe1c7a5
PM
661}
662EXPORT_SYMBOL(qdisc_class_hash_grow);
663
664int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
665{
666 unsigned int size = 4;
667
668 clhash->hash = qdisc_class_hash_alloc(size);
669 if (clhash->hash == NULL)
670 return -ENOMEM;
671 clhash->hashsize = size;
672 clhash->hashmask = size - 1;
673 clhash->hashelems = 0;
674 return 0;
675}
676EXPORT_SYMBOL(qdisc_class_hash_init);
677
678void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
679{
9695fe6f 680 kvfree(clhash->hash);
6fe1c7a5
PM
681}
682EXPORT_SYMBOL(qdisc_class_hash_destroy);
683
684void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
685 struct Qdisc_class_common *cl)
686{
687 unsigned int h;
688
689 INIT_HLIST_NODE(&cl->hnode);
690 h = qdisc_class_hash(cl->classid, clhash->hashmask);
691 hlist_add_head(&cl->hnode, &clhash->hash[h]);
692 clhash->hashelems++;
693}
694EXPORT_SYMBOL(qdisc_class_hash_insert);
695
696void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
697 struct Qdisc_class_common *cl)
698{
699 hlist_del(&cl->hnode);
700 clhash->hashelems--;
701}
702EXPORT_SYMBOL(qdisc_class_hash_remove);
703
fa0f5aa7
ED
704/* Allocate an unique handle from space managed by kernel
705 * Possible range is [8000-FFFF]:0000 (0x8000 values)
706 */
1da177e4
LT
707static u32 qdisc_alloc_handle(struct net_device *dev)
708{
fa0f5aa7 709 int i = 0x8000;
1da177e4
LT
710 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
711
712 do {
713 autohandle += TC_H_MAKE(0x10000U, 0);
714 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
715 autohandle = TC_H_MAKE(0x80000000U, 0);
fa0f5aa7
ED
716 if (!qdisc_lookup(dev, autohandle))
717 return autohandle;
718 cond_resched();
719 } while (--i > 0);
1da177e4 720
fa0f5aa7 721 return 0;
1da177e4
LT
722}
723
2ccccf5f
WC
724void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
725 unsigned int len)
43effa1e 726{
20fea08b 727 const struct Qdisc_class_ops *cops;
43effa1e
PM
728 unsigned long cl;
729 u32 parentid;
95946658 730 bool notify;
2c8c8e6f 731 int drops;
43effa1e 732
2ccccf5f 733 if (n == 0 && len == 0)
43effa1e 734 return;
2c8c8e6f 735 drops = max_t(int, n, 0);
4eaf3b84 736 rcu_read_lock();
43effa1e 737 while ((parentid = sch->parent)) {
066a3b5b 738 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
4eaf3b84 739 break;
066a3b5b 740
4eaf3b84
ED
741 if (sch->flags & TCQ_F_NOPARENT)
742 break;
95946658
KK
743 /* Notify parent qdisc only if child qdisc becomes empty.
744 *
745 * If child was empty even before update then backlog
746 * counter is screwed and we skip notification because
747 * parent class is already passive.
748 */
749 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
4eaf3b84 750 /* TODO: perform the search on a per txq basis */
5ce2d488 751 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa 752 if (sch == NULL) {
4eaf3b84
ED
753 WARN_ON_ONCE(parentid != TC_H_ROOT);
754 break;
ffc8fefa 755 }
43effa1e 756 cops = sch->ops->cl_ops;
95946658 757 if (notify && cops->qlen_notify) {
143976ce 758 cl = cops->find(sch, parentid);
43effa1e 759 cops->qlen_notify(sch, cl);
43effa1e
PM
760 }
761 sch->q.qlen -= n;
2ccccf5f 762 sch->qstats.backlog -= len;
25331d6c 763 __qdisc_qstats_drop(sch, drops);
43effa1e 764 }
4eaf3b84 765 rcu_read_unlock();
43effa1e 766}
2ccccf5f 767EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
1da177e4 768
27d7f07c
WC
769static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
770 u32 portid, u32 seq, u16 flags, int event)
771{
772 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
773 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
774 struct tcmsg *tcm;
775 struct nlmsghdr *nlh;
776 unsigned char *b = skb_tail_pointer(skb);
777 struct gnet_dump d;
778 struct qdisc_size_table *stab;
779 __u32 qlen;
780
781 cond_resched();
782 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
783 if (!nlh)
784 goto out_nlmsg_trim;
785 tcm = nlmsg_data(nlh);
786 tcm->tcm_family = AF_UNSPEC;
787 tcm->tcm__pad1 = 0;
788 tcm->tcm__pad2 = 0;
789 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
790 tcm->tcm_parent = clid;
791 tcm->tcm_handle = q->handle;
792 tcm->tcm_info = refcount_read(&q->refcnt);
793 if (nla_put_string(skb, TCA_KIND, q->ops->id))
794 goto nla_put_failure;
795 if (q->ops->dump && q->ops->dump(q, skb) < 0)
796 goto nla_put_failure;
797 qlen = q->q.qlen;
798
799 stab = rtnl_dereference(q->stab);
800 if (stab && qdisc_dump_stab(skb, stab) < 0)
801 goto nla_put_failure;
802
803 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
804 NULL, &d, TCA_PAD) < 0)
805 goto nla_put_failure;
806
807 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
808 goto nla_put_failure;
809
810 if (qdisc_is_percpu_stats(q)) {
811 cpu_bstats = q->cpu_bstats;
812 cpu_qstats = q->cpu_qstats;
813 }
814
815 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
816 &d, cpu_bstats, &q->bstats) < 0 ||
817 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
818 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
819 goto nla_put_failure;
820
821 if (gnet_stats_finish_copy(&d) < 0)
822 goto nla_put_failure;
823
824 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
825 return skb->len;
826
827out_nlmsg_trim:
828nla_put_failure:
829 nlmsg_trim(skb, b);
830 return -1;
831}
832
833static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
834{
835 if (q->flags & TCQ_F_BUILTIN)
836 return true;
837 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
838 return true;
839
840 return false;
841}
842
843static int qdisc_notify(struct net *net, struct sk_buff *oskb,
844 struct nlmsghdr *n, u32 clid,
845 struct Qdisc *old, struct Qdisc *new)
846{
847 struct sk_buff *skb;
848 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
849
850 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
851 if (!skb)
852 return -ENOBUFS;
853
854 if (old && !tc_qdisc_dump_ignore(old, false)) {
855 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
856 0, RTM_DELQDISC) < 0)
857 goto err_out;
858 }
859 if (new && !tc_qdisc_dump_ignore(new, false)) {
860 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
861 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
862 goto err_out;
863 }
864
865 if (skb->len)
866 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
867 n->nlmsg_flags & NLM_F_ECHO);
868
869err_out:
870 kfree_skb(skb);
871 return -EINVAL;
872}
873
7316ae88
TG
874static void notify_and_destroy(struct net *net, struct sk_buff *skb,
875 struct nlmsghdr *n, u32 clid,
99194cff
DM
876 struct Qdisc *old, struct Qdisc *new)
877{
878 if (new || old)
7316ae88 879 qdisc_notify(net, skb, n, clid, old, new);
1da177e4 880
4d8863a2 881 if (old)
99194cff 882 qdisc_destroy(old);
99194cff
DM
883}
884
885/* Graft qdisc "new" to class "classid" of qdisc "parent" or
886 * to device "dev".
887 *
888 * When appropriate send a netlink notification using 'skb'
889 * and "n".
890 *
891 * On success, destroy old qdisc.
1da177e4
LT
892 */
893
894static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff
DM
895 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
896 struct Qdisc *new, struct Qdisc *old)
1da177e4 897{
99194cff 898 struct Qdisc *q = old;
7316ae88 899 struct net *net = dev_net(dev);
1da177e4 900 int err = 0;
1da177e4 901
10297b99 902 if (parent == NULL) {
99194cff
DM
903 unsigned int i, num_q, ingress;
904
905 ingress = 0;
906 num_q = dev->num_tx_queues;
8d50b53d
DM
907 if ((q && q->flags & TCQ_F_INGRESS) ||
908 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
909 num_q = 1;
910 ingress = 1;
24824a09
ED
911 if (!dev_ingress_queue(dev))
912 return -ENOENT;
99194cff
DM
913 }
914
915 if (dev->flags & IFF_UP)
916 dev_deactivate(dev);
917
86e363dc
WC
918 if (new && new->ops->attach)
919 goto skip;
6ec1c69a 920
99194cff 921 for (i = 0; i < num_q; i++) {
24824a09 922 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
99194cff
DM
923
924 if (!ingress)
925 dev_queue = netdev_get_tx_queue(dev, i);
926
8d50b53d
DM
927 old = dev_graft_qdisc(dev_queue, new);
928 if (new && i > 0)
7b936405 929 refcount_inc(&new->refcnt);
8d50b53d 930
036d6a67
JP
931 if (!ingress)
932 qdisc_destroy(old);
1da177e4 933 }
99194cff 934
86e363dc 935skip:
036d6a67 936 if (!ingress) {
7316ae88
TG
937 notify_and_destroy(net, skb, n, classid,
938 dev->qdisc, new);
036d6a67 939 if (new && !new->ops->attach)
7b936405 940 refcount_inc(&new->refcnt);
036d6a67 941 dev->qdisc = new ? : &noop_qdisc;
86e363dc
WC
942
943 if (new && new->ops->attach)
944 new->ops->attach(new);
036d6a67 945 } else {
7316ae88 946 notify_and_destroy(net, skb, n, classid, old, new);
036d6a67 947 }
af356afa 948
99194cff
DM
949 if (dev->flags & IFF_UP)
950 dev_activate(dev);
1da177e4 951 } else {
20fea08b 952 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4 953
c9f1d038
PM
954 err = -EOPNOTSUPP;
955 if (cops && cops->graft) {
143976ce
WC
956 unsigned long cl = cops->find(parent, classid);
957
958 if (cl)
99194cff 959 err = cops->graft(parent, cl, new, &old);
143976ce 960 else
c9f1d038 961 err = -ENOENT;
1da177e4 962 }
99194cff 963 if (!err)
7316ae88 964 notify_and_destroy(net, skb, n, classid, old, new);
1da177e4
LT
965 }
966 return err;
967}
968
25bfcd5a
JP
969/* lockdep annotation is needed for ingress; egress gets it only for name */
970static struct lock_class_key qdisc_tx_lock;
971static struct lock_class_key qdisc_rx_lock;
972
1da177e4
LT
973/*
974 Allocate and initialize new qdisc.
975
976 Parameters are passed via opt.
977 */
978
5a7a5555
JHS
979static struct Qdisc *qdisc_create(struct net_device *dev,
980 struct netdev_queue *dev_queue,
981 struct Qdisc *p, u32 parent, u32 handle,
982 struct nlattr **tca, int *errp)
1da177e4
LT
983{
984 int err;
1e90474c 985 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
986 struct Qdisc *sch;
987 struct Qdisc_ops *ops;
175f9c1b 988 struct qdisc_size_table *stab;
1da177e4
LT
989
990 ops = qdisc_lookup_ops(kind);
95a5afca 991#ifdef CONFIG_MODULES
1da177e4
LT
992 if (ops == NULL && kind != NULL) {
993 char name[IFNAMSIZ];
1e90474c 994 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
995 /* We dropped the RTNL semaphore in order to
996 * perform the module load. So, even if we
997 * succeeded in loading the module we have to
998 * tell the caller to replay the request. We
999 * indicate this using -EAGAIN.
1000 * We replay the request because the device may
1001 * go away in the mean time.
1002 */
1003 rtnl_unlock();
1004 request_module("sch_%s", name);
1005 rtnl_lock();
1006 ops = qdisc_lookup_ops(kind);
1007 if (ops != NULL) {
1008 /* We will try again qdisc_lookup_ops,
1009 * so don't keep a reference.
1010 */
1011 module_put(ops->owner);
1012 err = -EAGAIN;
1013 goto err_out;
1014 }
1015 }
1016 }
1017#endif
1018
b9e2cc0f 1019 err = -ENOENT;
1da177e4
LT
1020 if (ops == NULL)
1021 goto err_out;
1022
5ce2d488 1023 sch = qdisc_alloc(dev_queue, ops);
3d54b82f
TG
1024 if (IS_ERR(sch)) {
1025 err = PTR_ERR(sch);
1da177e4 1026 goto err_out2;
3d54b82f 1027 }
1da177e4 1028
ffc8fefa
PM
1029 sch->parent = parent;
1030
3d54b82f 1031 if (handle == TC_H_INGRESS) {
1da177e4 1032 sch->flags |= TCQ_F_INGRESS;
3d54b82f 1033 handle = TC_H_MAKE(TC_H_INGRESS, 0);
25bfcd5a 1034 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
fd44de7c 1035 } else {
fd44de7c
PM
1036 if (handle == 0) {
1037 handle = qdisc_alloc_handle(dev);
1038 err = -ENOMEM;
1039 if (handle == 0)
1040 goto err_out3;
1041 }
25bfcd5a 1042 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1abbe139 1043 if (!netif_is_multiqueue(dev))
225734de 1044 sch->flags |= TCQ_F_ONETXQUEUE;
1da177e4
LT
1045 }
1046
3d54b82f 1047 sch->handle = handle;
1da177e4 1048
84c46dd8
JDB
1049 /* This exist to keep backward compatible with a userspace
1050 * loophole, what allowed userspace to get IFF_NO_QUEUE
1051 * facility on older kernels by setting tx_queue_len=0 (prior
1052 * to qdisc init), and then forgot to reinit tx_queue_len
1053 * before again attaching a qdisc.
1054 */
1055 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1056 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1057 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1058 }
1059
1e90474c 1060 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
22e0f8b9
JF
1061 if (qdisc_is_percpu_stats(sch)) {
1062 sch->cpu_bstats =
7c1c97d5 1063 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
22e0f8b9
JF
1064 if (!sch->cpu_bstats)
1065 goto err_out4;
b0ab6f92
JF
1066
1067 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1068 if (!sch->cpu_qstats)
1069 goto err_out4;
22e0f8b9
JF
1070 }
1071
175f9c1b
JK
1072 if (tca[TCA_STAB]) {
1073 stab = qdisc_get_stab(tca[TCA_STAB]);
1074 if (IS_ERR(stab)) {
1075 err = PTR_ERR(stab);
7c64b9f3 1076 goto err_out4;
175f9c1b 1077 }
a2da570d 1078 rcu_assign_pointer(sch->stab, stab);
175f9c1b 1079 }
1e90474c 1080 if (tca[TCA_RATE]) {
edb09eb1 1081 seqcount_t *running;
f6f9b93f 1082
23bcf634
PM
1083 err = -EOPNOTSUPP;
1084 if (sch->flags & TCQ_F_MQROOT)
1085 goto err_out4;
1086
f6f9b93f 1087 if ((sch->parent != TC_H_ROOT) &&
23bcf634
PM
1088 !(sch->flags & TCQ_F_INGRESS) &&
1089 (!p || !(p->flags & TCQ_F_MQROOT)))
edb09eb1 1090 running = qdisc_root_sleeping_running(sch);
f6f9b93f 1091 else
edb09eb1 1092 running = &sch->running;
f6f9b93f 1093
22e0f8b9
JF
1094 err = gen_new_estimator(&sch->bstats,
1095 sch->cpu_bstats,
1096 &sch->rate_est,
edb09eb1
ED
1097 NULL,
1098 running,
22e0f8b9 1099 tca[TCA_RATE]);
23bcf634
PM
1100 if (err)
1101 goto err_out4;
023e09a7 1102 }
f6e0b239 1103
49b49971 1104 qdisc_hash_add(sch, false);
1da177e4 1105
1da177e4
LT
1106 return sch;
1107 }
87b60cfa 1108 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
c1a4872e
GF
1109 if (ops->destroy)
1110 ops->destroy(sch);
1da177e4
LT
1111err_out3:
1112 dev_put(dev);
3d54b82f 1113 kfree((char *) sch - sch->padded);
1da177e4
LT
1114err_out2:
1115 module_put(ops->owner);
1116err_out:
1117 *errp = err;
1da177e4 1118 return NULL;
23bcf634
PM
1119
1120err_out4:
22e0f8b9 1121 free_percpu(sch->cpu_bstats);
b0ab6f92 1122 free_percpu(sch->cpu_qstats);
23bcf634
PM
1123 /*
1124 * Any broken qdiscs that would require a ops->reset() here?
1125 * The qdisc was never in action so it shouldn't be necessary.
1126 */
a2da570d 1127 qdisc_put_stab(rtnl_dereference(sch->stab));
23bcf634
PM
1128 if (ops->destroy)
1129 ops->destroy(sch);
1130 goto err_out3;
1da177e4
LT
1131}
1132
1e90474c 1133static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
1da177e4 1134{
a2da570d 1135 struct qdisc_size_table *ostab, *stab = NULL;
175f9c1b 1136 int err = 0;
1da177e4 1137
175f9c1b 1138 if (tca[TCA_OPTIONS]) {
1da177e4
LT
1139 if (sch->ops->change == NULL)
1140 return -EINVAL;
1e90474c 1141 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
1da177e4
LT
1142 if (err)
1143 return err;
1144 }
175f9c1b
JK
1145
1146 if (tca[TCA_STAB]) {
1147 stab = qdisc_get_stab(tca[TCA_STAB]);
1148 if (IS_ERR(stab))
1149 return PTR_ERR(stab);
1150 }
1151
a2da570d
ED
1152 ostab = rtnl_dereference(sch->stab);
1153 rcu_assign_pointer(sch->stab, stab);
1154 qdisc_put_stab(ostab);
175f9c1b 1155
23bcf634 1156 if (tca[TCA_RATE]) {
71bcb09a
SH
1157 /* NB: ignores errors from replace_estimator
1158 because change can't be undone. */
23bcf634
PM
1159 if (sch->flags & TCQ_F_MQROOT)
1160 goto out;
22e0f8b9
JF
1161 gen_replace_estimator(&sch->bstats,
1162 sch->cpu_bstats,
1163 &sch->rate_est,
edb09eb1
ED
1164 NULL,
1165 qdisc_root_sleeping_running(sch),
22e0f8b9 1166 tca[TCA_RATE]);
23bcf634
PM
1167 }
1168out:
1da177e4
LT
1169 return 0;
1170}
1171
cc7ec456
ED
1172struct check_loop_arg {
1173 struct qdisc_walker w;
1da177e4
LT
1174 struct Qdisc *p;
1175 int depth;
1176};
1177
5a7a5555
JHS
1178static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1179 struct qdisc_walker *w);
1da177e4
LT
1180
1181static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1182{
1183 struct check_loop_arg arg;
1184
1185 if (q->ops->cl_ops == NULL)
1186 return 0;
1187
1188 arg.w.stop = arg.w.skip = arg.w.count = 0;
1189 arg.w.fn = check_loop_fn;
1190 arg.depth = depth;
1191 arg.p = p;
1192 q->ops->cl_ops->walk(q, &arg.w);
1193 return arg.w.stop ? -ELOOP : 0;
1194}
1195
1196static int
1197check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1198{
1199 struct Qdisc *leaf;
20fea08b 1200 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
1201 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1202
1203 leaf = cops->leaf(q, cl);
1204 if (leaf) {
1205 if (leaf == arg->p || arg->depth > 7)
1206 return -ELOOP;
1207 return check_loop(leaf, arg->p, arg->depth + 1);
1208 }
1209 return 0;
1210}
1211
1212/*
1213 * Delete/get qdisc.
1214 */
1215
c21ef3e3
DA
1216static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1217 struct netlink_ext_ack *extack)
1da177e4 1218{
3b1e0a65 1219 struct net *net = sock_net(skb->sk);
02ef22ca 1220 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1221 struct nlattr *tca[TCA_MAX + 1];
1da177e4 1222 struct net_device *dev;
de179c8c 1223 u32 clid;
1da177e4
LT
1224 struct Qdisc *q = NULL;
1225 struct Qdisc *p = NULL;
1226 int err;
1227
4e8bbb81 1228 if ((n->nlmsg_type != RTM_GETQDISC) &&
5f013c9b 1229 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1230 return -EPERM;
1231
c21ef3e3 1232 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1e90474c
PM
1233 if (err < 0)
1234 return err;
1235
de179c8c
H
1236 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1237 if (!dev)
1238 return -ENODEV;
1239
1240 clid = tcm->tcm_parent;
1da177e4
LT
1241 if (clid) {
1242 if (clid != TC_H_ROOT) {
1243 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
cc7ec456
ED
1244 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1245 if (!p)
1da177e4
LT
1246 return -ENOENT;
1247 q = qdisc_leaf(p, clid);
cc7ec456
ED
1248 } else if (dev_ingress_queue(dev)) {
1249 q = dev_ingress_queue(dev)->qdisc_sleeping;
10297b99 1250 }
1da177e4 1251 } else {
af356afa 1252 q = dev->qdisc;
1da177e4
LT
1253 }
1254 if (!q)
1255 return -ENOENT;
1256
1257 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1258 return -EINVAL;
1259 } else {
cc7ec456
ED
1260 q = qdisc_lookup(dev, tcm->tcm_handle);
1261 if (!q)
1da177e4
LT
1262 return -ENOENT;
1263 }
1264
1e90474c 1265 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1266 return -EINVAL;
1267
1268 if (n->nlmsg_type == RTM_DELQDISC) {
1269 if (!clid)
1270 return -EINVAL;
1271 if (q->handle == 0)
1272 return -ENOENT;
cc7ec456
ED
1273 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1274 if (err != 0)
1da177e4 1275 return err;
1da177e4 1276 } else {
7316ae88 1277 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1278 }
1279 return 0;
1280}
1281
1282/*
cc7ec456 1283 * Create/change qdisc.
1da177e4
LT
1284 */
1285
c21ef3e3
DA
1286static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1287 struct netlink_ext_ack *extack)
1da177e4 1288{
3b1e0a65 1289 struct net *net = sock_net(skb->sk);
1da177e4 1290 struct tcmsg *tcm;
1e90474c 1291 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1292 struct net_device *dev;
1293 u32 clid;
1294 struct Qdisc *q, *p;
1295 int err;
1296
5f013c9b 1297 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1298 return -EPERM;
1299
1da177e4
LT
1300replay:
1301 /* Reinit, just in case something touches this. */
c21ef3e3 1302 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
de179c8c
H
1303 if (err < 0)
1304 return err;
1305
02ef22ca 1306 tcm = nlmsg_data(n);
1da177e4
LT
1307 clid = tcm->tcm_parent;
1308 q = p = NULL;
1309
cc7ec456
ED
1310 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1311 if (!dev)
1da177e4
LT
1312 return -ENODEV;
1313
1e90474c 1314
1da177e4
LT
1315 if (clid) {
1316 if (clid != TC_H_ROOT) {
1317 if (clid != TC_H_INGRESS) {
cc7ec456
ED
1318 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1319 if (!p)
1da177e4
LT
1320 return -ENOENT;
1321 q = qdisc_leaf(p, clid);
cc7ec456
ED
1322 } else if (dev_ingress_queue_create(dev)) {
1323 q = dev_ingress_queue(dev)->qdisc_sleeping;
1da177e4
LT
1324 }
1325 } else {
af356afa 1326 q = dev->qdisc;
1da177e4
LT
1327 }
1328
1329 /* It may be default qdisc, ignore it */
1330 if (q && q->handle == 0)
1331 q = NULL;
1332
1333 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1334 if (tcm->tcm_handle) {
cc7ec456 1335 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1da177e4
LT
1336 return -EEXIST;
1337 if (TC_H_MIN(tcm->tcm_handle))
1338 return -EINVAL;
cc7ec456
ED
1339 q = qdisc_lookup(dev, tcm->tcm_handle);
1340 if (!q)
1da177e4 1341 goto create_n_graft;
cc7ec456 1342 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4 1343 return -EEXIST;
1e90474c 1344 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1345 return -EINVAL;
1346 if (q == p ||
1347 (p && check_loop(q, p, 0)))
1348 return -ELOOP;
7b936405 1349 refcount_inc(&q->refcnt);
1da177e4
LT
1350 goto graft;
1351 } else {
cc7ec456 1352 if (!q)
1da177e4
LT
1353 goto create_n_graft;
1354
1355 /* This magic test requires explanation.
1356 *
1357 * We know, that some child q is already
1358 * attached to this parent and have choice:
1359 * either to change it or to create/graft new one.
1360 *
1361 * 1. We are allowed to create/graft only
1362 * if CREATE and REPLACE flags are set.
1363 *
1364 * 2. If EXCL is set, requestor wanted to say,
1365 * that qdisc tcm_handle is not expected
1366 * to exist, so that we choose create/graft too.
1367 *
1368 * 3. The last case is when no flags are set.
1369 * Alas, it is sort of hole in API, we
1370 * cannot decide what to do unambiguously.
1371 * For now we select create/graft, if
1372 * user gave KIND, which does not match existing.
1373 */
cc7ec456
ED
1374 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1375 (n->nlmsg_flags & NLM_F_REPLACE) &&
1376 ((n->nlmsg_flags & NLM_F_EXCL) ||
1e90474c
PM
1377 (tca[TCA_KIND] &&
1378 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1379 goto create_n_graft;
1380 }
1381 }
1382 } else {
1383 if (!tcm->tcm_handle)
1384 return -EINVAL;
1385 q = qdisc_lookup(dev, tcm->tcm_handle);
1386 }
1387
1388 /* Change qdisc parameters */
1389 if (q == NULL)
1390 return -ENOENT;
cc7ec456 1391 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4 1392 return -EEXIST;
1e90474c 1393 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1da177e4
LT
1394 return -EINVAL;
1395 err = qdisc_change(q, tca);
1396 if (err == 0)
7316ae88 1397 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1398 return err;
1399
1400create_n_graft:
cc7ec456 1401 if (!(n->nlmsg_flags & NLM_F_CREATE))
1da177e4 1402 return -ENOENT;
24824a09
ED
1403 if (clid == TC_H_INGRESS) {
1404 if (dev_ingress_queue(dev))
1405 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1406 tcm->tcm_parent, tcm->tcm_parent,
1407 tca, &err);
1408 else
1409 err = -ENOENT;
1410 } else {
926e61b7 1411 struct netdev_queue *dev_queue;
6ec1c69a
DM
1412
1413 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
926e61b7
JP
1414 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1415 else if (p)
1416 dev_queue = p->dev_queue;
1417 else
1418 dev_queue = netdev_get_tx_queue(dev, 0);
6ec1c69a 1419
926e61b7 1420 q = qdisc_create(dev, dev_queue, p,
bb949fbd 1421 tcm->tcm_parent, tcm->tcm_handle,
ffc8fefa 1422 tca, &err);
6ec1c69a 1423 }
1da177e4
LT
1424 if (q == NULL) {
1425 if (err == -EAGAIN)
1426 goto replay;
1427 return err;
1428 }
1429
1430graft:
e5befbd9
IJ
1431 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1432 if (err) {
1433 if (q)
1434 qdisc_destroy(q);
1435 return err;
1da177e4 1436 }
e5befbd9 1437
1da177e4
LT
1438 return 0;
1439}
1440
30723673
DM
1441static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1442 struct netlink_callback *cb,
49b49971
JK
1443 int *q_idx_p, int s_q_idx, bool recur,
1444 bool dump_invisible)
30723673
DM
1445{
1446 int ret = 0, q_idx = *q_idx_p;
1447 struct Qdisc *q;
59cc1f61 1448 int b;
30723673
DM
1449
1450 if (!root)
1451 return 0;
1452
1453 q = root;
1454 if (q_idx < s_q_idx) {
1455 q_idx++;
1456 } else {
49b49971 1457 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1458 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1459 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1460 RTM_NEWQDISC) <= 0)
30723673
DM
1461 goto done;
1462 q_idx++;
1463 }
69012ae4 1464
ea327469
JK
1465 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1466 * itself has already been dumped.
1467 *
1468 * If we've already dumped the top-level (ingress) qdisc above and the global
1469 * qdisc hashtable, we don't want to hit it again
1470 */
1471 if (!qdisc_dev(root) || !recur)
69012ae4
JK
1472 goto out;
1473
59cc1f61 1474 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1475 if (q_idx < s_q_idx) {
1476 q_idx++;
1477 continue;
1478 }
49b49971 1479 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1480 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1481 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1482 RTM_NEWQDISC) <= 0)
30723673
DM
1483 goto done;
1484 q_idx++;
1485 }
1486
1487out:
1488 *q_idx_p = q_idx;
1489 return ret;
1490done:
1491 ret = -1;
1492 goto out;
1493}
1494
1da177e4
LT
1495static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1496{
3b1e0a65 1497 struct net *net = sock_net(skb->sk);
1da177e4
LT
1498 int idx, q_idx;
1499 int s_idx, s_q_idx;
1500 struct net_device *dev;
49b49971
JK
1501 const struct nlmsghdr *nlh = cb->nlh;
1502 struct tcmsg *tcm = nlmsg_data(nlh);
1503 struct nlattr *tca[TCA_MAX + 1];
1504 int err;
1da177e4
LT
1505
1506 s_idx = cb->args[0];
1507 s_q_idx = q_idx = cb->args[1];
f1e9016d 1508
7562f876 1509 idx = 0;
15dc36eb 1510 ASSERT_RTNL();
49b49971 1511
fceb6435 1512 err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL);
49b49971
JK
1513 if (err < 0)
1514 return err;
1515
15dc36eb 1516 for_each_netdev(net, dev) {
30723673
DM
1517 struct netdev_queue *dev_queue;
1518
1da177e4 1519 if (idx < s_idx)
7562f876 1520 goto cont;
1da177e4
LT
1521 if (idx > s_idx)
1522 s_q_idx = 0;
1da177e4 1523 q_idx = 0;
30723673 1524
5a7a5555 1525 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
49b49971 1526 true, tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1527 goto done;
1528
24824a09
ED
1529 dev_queue = dev_ingress_queue(dev);
1530 if (dev_queue &&
1531 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
49b49971
JK
1532 &q_idx, s_q_idx, false,
1533 tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1534 goto done;
1535
7562f876
PE
1536cont:
1537 idx++;
1da177e4
LT
1538 }
1539
1540done:
1da177e4
LT
1541 cb->args[0] = idx;
1542 cb->args[1] = q_idx;
1543
1544 return skb->len;
1545}
1546
1547
1548
1549/************************************************
1550 * Traffic classes manipulation. *
1551 ************************************************/
1552
27d7f07c
WC
1553static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1554 unsigned long cl,
1555 u32 portid, u32 seq, u16 flags, int event)
1556{
1557 struct tcmsg *tcm;
1558 struct nlmsghdr *nlh;
1559 unsigned char *b = skb_tail_pointer(skb);
1560 struct gnet_dump d;
1561 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1562
27d7f07c
WC
1563 cond_resched();
1564 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1565 if (!nlh)
1566 goto out_nlmsg_trim;
1567 tcm = nlmsg_data(nlh);
1568 tcm->tcm_family = AF_UNSPEC;
1569 tcm->tcm__pad1 = 0;
1570 tcm->tcm__pad2 = 0;
1571 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1572 tcm->tcm_parent = q->handle;
1573 tcm->tcm_handle = q->handle;
1574 tcm->tcm_info = 0;
1575 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1576 goto nla_put_failure;
1577 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1578 goto nla_put_failure;
1579
1580 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1581 NULL, &d, TCA_PAD) < 0)
1582 goto nla_put_failure;
1583
1584 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1585 goto nla_put_failure;
1586
1587 if (gnet_stats_finish_copy(&d) < 0)
1588 goto nla_put_failure;
1589
1590 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1591 return skb->len;
1592
1593out_nlmsg_trim:
1594nla_put_failure:
1595 nlmsg_trim(skb, b);
1596 return -1;
1597}
1598
1599static int tclass_notify(struct net *net, struct sk_buff *oskb,
1600 struct nlmsghdr *n, struct Qdisc *q,
1601 unsigned long cl, int event)
1602{
1603 struct sk_buff *skb;
1604 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1605
1606 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1607 if (!skb)
1608 return -ENOBUFS;
1609
1610 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1611 kfree_skb(skb);
1612 return -EINVAL;
1613 }
1614
1615 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1616 n->nlmsg_flags & NLM_F_ECHO);
1617}
1da177e4 1618
14546ba1
WC
1619static int tclass_del_notify(struct net *net,
1620 const struct Qdisc_class_ops *cops,
1621 struct sk_buff *oskb, struct nlmsghdr *n,
1622 struct Qdisc *q, unsigned long cl)
1623{
1624 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1625 struct sk_buff *skb;
1626 int err = 0;
1627
1628 if (!cops->delete)
1629 return -EOPNOTSUPP;
1630
1631 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1632 if (!skb)
1633 return -ENOBUFS;
1634
1635 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1636 RTM_DELTCLASS) < 0) {
1637 kfree_skb(skb);
1638 return -EINVAL;
1639 }
1640
1641 err = cops->delete(q, cl);
1642 if (err) {
1643 kfree_skb(skb);
1644 return err;
1645 }
1646
1647 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1648 n->nlmsg_flags & NLM_F_ECHO);
1649}
1650
c21ef3e3
DA
1651static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1652 struct netlink_ext_ack *extack)
1da177e4 1653{
3b1e0a65 1654 struct net *net = sock_net(skb->sk);
02ef22ca 1655 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1656 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1657 struct net_device *dev;
1658 struct Qdisc *q = NULL;
20fea08b 1659 const struct Qdisc_class_ops *cops;
1da177e4
LT
1660 unsigned long cl = 0;
1661 unsigned long new_cl;
de179c8c
H
1662 u32 portid;
1663 u32 clid;
1664 u32 qid;
1da177e4
LT
1665 int err;
1666
4e8bbb81 1667 if ((n->nlmsg_type != RTM_GETTCLASS) &&
5f013c9b 1668 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1669 return -EPERM;
1670
c21ef3e3 1671 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1e90474c
PM
1672 if (err < 0)
1673 return err;
1674
de179c8c
H
1675 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1676 if (!dev)
1677 return -ENODEV;
1678
1da177e4
LT
1679 /*
1680 parent == TC_H_UNSPEC - unspecified parent.
1681 parent == TC_H_ROOT - class is root, which has no parent.
1682 parent == X:0 - parent is root class.
1683 parent == X:Y - parent is a node in hierarchy.
1684 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1685
1686 handle == 0:0 - generate handle from kernel pool.
1687 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1688 handle == X:Y - clear.
1689 handle == X:0 - root class.
1690 */
1691
1692 /* Step 1. Determine qdisc handle X:0 */
1693
de179c8c
H
1694 portid = tcm->tcm_parent;
1695 clid = tcm->tcm_handle;
1696 qid = TC_H_MAJ(clid);
1697
15e47304
EB
1698 if (portid != TC_H_ROOT) {
1699 u32 qid1 = TC_H_MAJ(portid);
1da177e4
LT
1700
1701 if (qid && qid1) {
1702 /* If both majors are known, they must be identical. */
1703 if (qid != qid1)
1704 return -EINVAL;
1705 } else if (qid1) {
1706 qid = qid1;
1707 } else if (qid == 0)
af356afa 1708 qid = dev->qdisc->handle;
1da177e4
LT
1709
1710 /* Now qid is genuine qdisc handle consistent
cc7ec456
ED
1711 * both with parent and child.
1712 *
15e47304 1713 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1da177e4 1714 */
15e47304
EB
1715 if (portid)
1716 portid = TC_H_MAKE(qid, portid);
1da177e4
LT
1717 } else {
1718 if (qid == 0)
af356afa 1719 qid = dev->qdisc->handle;
1da177e4
LT
1720 }
1721
1722 /* OK. Locate qdisc */
cc7ec456
ED
1723 q = qdisc_lookup(dev, qid);
1724 if (!q)
1da177e4
LT
1725 return -ENOENT;
1726
1727 /* An check that it supports classes */
1728 cops = q->ops->cl_ops;
1729 if (cops == NULL)
1730 return -EINVAL;
1731
1732 /* Now try to get class */
1733 if (clid == 0) {
15e47304 1734 if (portid == TC_H_ROOT)
1da177e4
LT
1735 clid = qid;
1736 } else
1737 clid = TC_H_MAKE(qid, clid);
1738
1739 if (clid)
143976ce 1740 cl = cops->find(q, clid);
1da177e4
LT
1741
1742 if (cl == 0) {
1743 err = -ENOENT;
cc7ec456
ED
1744 if (n->nlmsg_type != RTM_NEWTCLASS ||
1745 !(n->nlmsg_flags & NLM_F_CREATE))
1da177e4
LT
1746 goto out;
1747 } else {
1748 switch (n->nlmsg_type) {
10297b99 1749 case RTM_NEWTCLASS:
1da177e4 1750 err = -EEXIST;
cc7ec456 1751 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4
LT
1752 goto out;
1753 break;
1754 case RTM_DELTCLASS:
14546ba1 1755 err = tclass_del_notify(net, cops, skb, n, q, cl);
1da177e4
LT
1756 goto out;
1757 case RTM_GETTCLASS:
7316ae88 1758 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1da177e4
LT
1759 goto out;
1760 default:
1761 err = -EINVAL;
1762 goto out;
1763 }
1764 }
1765
1766 new_cl = cl;
de6d5cdf
PM
1767 err = -EOPNOTSUPP;
1768 if (cops->change)
15e47304 1769 err = cops->change(q, clid, portid, tca, &new_cl);
1da177e4 1770 if (err == 0)
7316ae88 1771 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
1da177e4
LT
1772
1773out:
1da177e4
LT
1774 return err;
1775}
1776
cc7ec456
ED
1777struct qdisc_dump_args {
1778 struct qdisc_walker w;
1779 struct sk_buff *skb;
1780 struct netlink_callback *cb;
1da177e4
LT
1781};
1782
5a7a5555
JHS
1783static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1784 struct qdisc_walker *arg)
1da177e4
LT
1785{
1786 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1787
15e47304 1788 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
5a7a5555
JHS
1789 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1790 RTM_NEWTCLASS);
1da177e4
LT
1791}
1792
30723673
DM
1793static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1794 struct tcmsg *tcm, struct netlink_callback *cb,
1795 int *t_p, int s_t)
1796{
1797 struct qdisc_dump_args arg;
1798
49b49971 1799 if (tc_qdisc_dump_ignore(q, false) ||
30723673
DM
1800 *t_p < s_t || !q->ops->cl_ops ||
1801 (tcm->tcm_parent &&
1802 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1803 (*t_p)++;
1804 return 0;
1805 }
1806 if (*t_p > s_t)
1807 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1808 arg.w.fn = qdisc_class_dump;
1809 arg.skb = skb;
1810 arg.cb = cb;
1811 arg.w.stop = 0;
1812 arg.w.skip = cb->args[1];
1813 arg.w.count = 0;
1814 q->ops->cl_ops->walk(q, &arg.w);
1815 cb->args[1] = arg.w.count;
1816 if (arg.w.stop)
1817 return -1;
1818 (*t_p)++;
1819 return 0;
1820}
1821
1822static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1823 struct tcmsg *tcm, struct netlink_callback *cb,
1824 int *t_p, int s_t)
1825{
1826 struct Qdisc *q;
59cc1f61 1827 int b;
30723673
DM
1828
1829 if (!root)
1830 return 0;
1831
1832 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1833 return -1;
1834
69012ae4
JK
1835 if (!qdisc_dev(root))
1836 return 0;
1837
cb395b20
ED
1838 if (tcm->tcm_parent) {
1839 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1840 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1841 return -1;
1842 return 0;
1843 }
59cc1f61 1844 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1845 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1846 return -1;
1847 }
1848
1849 return 0;
1850}
1851
1da177e4
LT
1852static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1853{
02ef22ca 1854 struct tcmsg *tcm = nlmsg_data(cb->nlh);
3b1e0a65 1855 struct net *net = sock_net(skb->sk);
30723673 1856 struct netdev_queue *dev_queue;
1da177e4 1857 struct net_device *dev;
30723673 1858 int t, s_t;
1da177e4 1859
573ce260 1860 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1da177e4 1861 return 0;
cc7ec456
ED
1862 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1863 if (!dev)
1da177e4
LT
1864 return 0;
1865
1866 s_t = cb->args[0];
1867 t = 0;
1868
af356afa 1869 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
30723673
DM
1870 goto done;
1871
24824a09
ED
1872 dev_queue = dev_ingress_queue(dev);
1873 if (dev_queue &&
1874 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1875 &t, s_t) < 0)
30723673 1876 goto done;
1da177e4 1877
30723673 1878done:
1da177e4
LT
1879 cb->args[0] = t;
1880
1881 dev_put(dev);
1882 return skb->len;
1883}
1884
1da177e4
LT
1885#ifdef CONFIG_PROC_FS
1886static int psched_show(struct seq_file *seq, void *v)
1887{
1888 seq_printf(seq, "%08x %08x %08x %08x\n",
ca44d6e6 1889 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca32 1890 1000000,
1e317688 1891 (u32)NSEC_PER_SEC / hrtimer_resolution);
1da177e4
LT
1892
1893 return 0;
1894}
1895
1896static int psched_open(struct inode *inode, struct file *file)
1897{
7e5ab157 1898 return single_open(file, psched_show, NULL);
1da177e4
LT
1899}
1900
da7071d7 1901static const struct file_operations psched_fops = {
1da177e4
LT
1902 .owner = THIS_MODULE,
1903 .open = psched_open,
1904 .read = seq_read,
1905 .llseek = seq_lseek,
1906 .release = single_release,
10297b99 1907};
7316ae88
TG
1908
1909static int __net_init psched_net_init(struct net *net)
1910{
1911 struct proc_dir_entry *e;
1912
d4beaa66 1913 e = proc_create("psched", 0, net->proc_net, &psched_fops);
7316ae88
TG
1914 if (e == NULL)
1915 return -ENOMEM;
1916
1917 return 0;
1918}
1919
1920static void __net_exit psched_net_exit(struct net *net)
1921{
ece31ffd 1922 remove_proc_entry("psched", net->proc_net);
7316ae88
TG
1923}
1924#else
1925static int __net_init psched_net_init(struct net *net)
1926{
1927 return 0;
1928}
1929
1930static void __net_exit psched_net_exit(struct net *net)
1931{
1932}
1da177e4
LT
1933#endif
1934
7316ae88
TG
1935static struct pernet_operations psched_net_ops = {
1936 .init = psched_net_init,
1937 .exit = psched_net_exit,
1938};
1939
1da177e4
LT
1940static int __init pktsched_init(void)
1941{
7316ae88
TG
1942 int err;
1943
1944 err = register_pernet_subsys(&psched_net_ops);
1945 if (err) {
cc7ec456 1946 pr_err("pktsched_init: "
7316ae88
TG
1947 "cannot initialize per netns operations\n");
1948 return err;
1949 }
1950
6da7c8fc 1951 register_qdisc(&pfifo_fast_ops);
1da177e4
LT
1952 register_qdisc(&pfifo_qdisc_ops);
1953 register_qdisc(&bfifo_qdisc_ops);
57dbb2d8 1954 register_qdisc(&pfifo_head_drop_qdisc_ops);
6ec1c69a 1955 register_qdisc(&mq_qdisc_ops);
d66d6c31 1956 register_qdisc(&noqueue_qdisc_ops);
1da177e4 1957
b97bac64
FW
1958 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
1959 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
5a7a5555 1960 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
b97bac64
FW
1961 0);
1962 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
1963 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
5a7a5555 1964 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
b97bac64 1965 0);
be577ddc 1966
1da177e4
LT
1967 return 0;
1968}
1969
1970subsys_initcall(pktsched_init);