net_sch: red: Fix the new offload indication
[linux-2.6-block.git] / net / sched / sch_api.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
1da177e4
LT
18#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
1da177e4 21#include <linux/string.h>
1da177e4 22#include <linux/errno.h>
1da177e4 23#include <linux/skbuff.h>
1da177e4
LT
24#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
4179477f 29#include <linux/hrtimer.h>
25bfcd5a 30#include <linux/lockdep.h>
5a0e3ad6 31#include <linux/slab.h>
59cc1f61 32#include <linux/hashtable.h>
1da177e4 33
457c4cbc 34#include <net/net_namespace.h>
b854272b 35#include <net/sock.h>
dc5fc579 36#include <net/netlink.h>
1da177e4 37#include <net/pkt_sched.h>
07d79fc7 38#include <net/pkt_cls.h>
1da177e4 39
1da177e4
LT
40/*
41
42 Short review.
43 -------------
44
45 This file consists of two interrelated parts:
46
47 1. queueing disciplines manager frontend.
48 2. traffic classes manager frontend.
49
50 Generally, queueing discipline ("qdisc") is a black box,
51 which is able to enqueue packets and to dequeue them (when
52 device is ready to send something) in order and at times
53 determined by algorithm hidden in it.
54
55 qdisc's are divided to two categories:
56 - "queues", which have no internal structure visible from outside.
57 - "schedulers", which split all the packets to "traffic classes",
58 using "packet classifiers" (look at cls_api.c)
59
60 In turn, classes may have child qdiscs (as rule, queues)
61 attached to them etc. etc. etc.
62
63 The goal of the routines in this file is to translate
64 information supplied by user in the form of handles
65 to more intelligible for kernel form, to make some sanity
66 checks and part of work, which is common to all qdiscs
67 and to provide rtnetlink notifications.
68
69 All real intelligent work is done inside qdisc modules.
70
71
72
73 Every discipline has two major routines: enqueue and dequeue.
74
75 ---dequeue
76
77 dequeue usually returns a skb to send. It is allowed to return NULL,
78 but it does not mean that queue is empty, it just means that
79 discipline does not want to send anything this time.
80 Queue is really empty if q->q.qlen == 0.
81 For complicated disciplines with multiple queues q->q is not
82 real packet queue, but however q->q.qlen must be valid.
83
84 ---enqueue
85
86 enqueue returns 0, if packet was enqueued successfully.
87 If packet (this one or another one) was dropped, it returns
88 not zero error code.
89 NET_XMIT_DROP - this packet dropped
90 Expected action: do not backoff, but wait until queue will clear.
91 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
92 Expected action: backoff or ignore
1da177e4
LT
93
94 Auxiliary routines:
95
99c0db26
JP
96 ---peek
97
98 like dequeue but without removing a packet from the queue
99
1da177e4
LT
100 ---reset
101
102 returns qdisc to initial state: purge all buffers, clear all
103 timers, counters (except for statistics) etc.
104
105 ---init
106
107 initializes newly created qdisc.
108
109 ---destroy
110
111 destroys resources allocated by init and during lifetime of qdisc.
112
113 ---change
114
115 changes qdisc parameters.
116 */
117
118/* Protects list of registered TC modules. It is pure SMP lock. */
119static DEFINE_RWLOCK(qdisc_mod_lock);
120
121
122/************************************************
123 * Queueing disciplines manipulation. *
124 ************************************************/
125
126
127/* The list of all installed queueing disciplines. */
128
129static struct Qdisc_ops *qdisc_base;
130
21eb2189 131/* Register/unregister queueing discipline */
1da177e4
LT
132
133int register_qdisc(struct Qdisc_ops *qops)
134{
135 struct Qdisc_ops *q, **qp;
136 int rc = -EEXIST;
137
138 write_lock(&qdisc_mod_lock);
139 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
140 if (!strcmp(qops->id, q->id))
141 goto out;
142
143 if (qops->enqueue == NULL)
144 qops->enqueue = noop_qdisc_ops.enqueue;
99c0db26 145 if (qops->peek == NULL) {
68fd26b5 146 if (qops->dequeue == NULL)
99c0db26 147 qops->peek = noop_qdisc_ops.peek;
68fd26b5
JP
148 else
149 goto out_einval;
99c0db26 150 }
1da177e4
LT
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
153
68fd26b5
JP
154 if (qops->cl_ops) {
155 const struct Qdisc_class_ops *cops = qops->cl_ops;
156
143976ce 157 if (!(cops->find && cops->walk && cops->leaf))
68fd26b5
JP
158 goto out_einval;
159
6529eaba 160 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
68fd26b5
JP
161 goto out_einval;
162 }
163
1da177e4
LT
164 qops->next = NULL;
165 *qp = qops;
166 rc = 0;
167out:
168 write_unlock(&qdisc_mod_lock);
169 return rc;
68fd26b5
JP
170
171out_einval:
172 rc = -EINVAL;
173 goto out;
1da177e4 174}
62e3ba1b 175EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
176
177int unregister_qdisc(struct Qdisc_ops *qops)
178{
179 struct Qdisc_ops *q, **qp;
180 int err = -ENOENT;
181
182 write_lock(&qdisc_mod_lock);
cc7ec456 183 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1da177e4
LT
184 if (q == qops)
185 break;
186 if (q) {
187 *qp = q->next;
188 q->next = NULL;
189 err = 0;
190 }
191 write_unlock(&qdisc_mod_lock);
192 return err;
193}
62e3ba1b 194EXPORT_SYMBOL(unregister_qdisc);
1da177e4 195
6da7c8fc 196/* Get default qdisc if not otherwise specified */
197void qdisc_get_default(char *name, size_t len)
198{
199 read_lock(&qdisc_mod_lock);
200 strlcpy(name, default_qdisc_ops->id, len);
201 read_unlock(&qdisc_mod_lock);
202}
203
204static struct Qdisc_ops *qdisc_lookup_default(const char *name)
205{
206 struct Qdisc_ops *q = NULL;
207
208 for (q = qdisc_base; q; q = q->next) {
209 if (!strcmp(name, q->id)) {
210 if (!try_module_get(q->owner))
211 q = NULL;
212 break;
213 }
214 }
215
216 return q;
217}
218
219/* Set new default qdisc to use */
220int qdisc_set_default(const char *name)
221{
222 const struct Qdisc_ops *ops;
223
224 if (!capable(CAP_NET_ADMIN))
225 return -EPERM;
226
227 write_lock(&qdisc_mod_lock);
228 ops = qdisc_lookup_default(name);
229 if (!ops) {
230 /* Not found, drop lock and try to load module */
231 write_unlock(&qdisc_mod_lock);
232 request_module("sch_%s", name);
233 write_lock(&qdisc_mod_lock);
234
235 ops = qdisc_lookup_default(name);
236 }
237
238 if (ops) {
239 /* Set new default */
240 module_put(default_qdisc_ops->owner);
241 default_qdisc_ops = ops;
242 }
243 write_unlock(&qdisc_mod_lock);
244
245 return ops ? 0 : -ENOENT;
246}
247
8ea3e439 248#ifdef CONFIG_NET_SCH_DEFAULT
249/* Set default value from kernel config */
250static int __init sch_default_qdisc(void)
251{
252 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
253}
254late_initcall(sch_default_qdisc);
255#endif
256
1da177e4 257/* We know handle. Find qdisc among all qdisc's attached to device
4eaf3b84
ED
258 * (root qdisc, all its children, children of children etc.)
259 * Note: caller either uses rtnl or rcu_read_lock()
1da177e4
LT
260 */
261
6113b748 262static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421
DM
263{
264 struct Qdisc *q;
265
69012ae4
JK
266 if (!qdisc_dev(root))
267 return (root->handle == handle ? root : NULL);
268
8123b421
DM
269 if (!(root->flags & TCQ_F_BUILTIN) &&
270 root->handle == handle)
271 return root;
272
59cc1f61 273 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) {
8123b421
DM
274 if (q->handle == handle)
275 return q;
276 }
277 return NULL;
278}
279
49b49971 280void qdisc_hash_add(struct Qdisc *q, bool invisible)
f6e0b239 281{
37314363 282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
4eaf3b84 283 ASSERT_RTNL();
59cc1f61 284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
49b49971
JK
285 if (invisible)
286 q->flags |= TCQ_F_INVISIBLE;
37314363 287 }
f6e0b239 288}
59cc1f61 289EXPORT_SYMBOL(qdisc_hash_add);
f6e0b239 290
59cc1f61 291void qdisc_hash_del(struct Qdisc *q)
f6e0b239 292{
4eaf3b84
ED
293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 ASSERT_RTNL();
59cc1f61 295 hash_del_rcu(&q->hash);
4eaf3b84 296 }
f6e0b239 297}
59cc1f61 298EXPORT_SYMBOL(qdisc_hash_del);
f6e0b239 299
ead81cc5 300struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 301{
f6e0b239
JP
302 struct Qdisc *q;
303
50317fce
CW
304 if (!handle)
305 return NULL;
af356afa
PM
306 q = qdisc_match_from_root(dev->qdisc, handle);
307 if (q)
308 goto out;
f6e0b239 309
24824a09
ED
310 if (dev_ingress_queue(dev))
311 q = qdisc_match_from_root(
312 dev_ingress_queue(dev)->qdisc_sleeping,
313 handle);
f6486d40 314out:
f6e0b239 315 return q;
1da177e4
LT
316}
317
318static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
319{
320 unsigned long cl;
321 struct Qdisc *leaf;
20fea08b 322 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
323
324 if (cops == NULL)
325 return NULL;
143976ce 326 cl = cops->find(p, classid);
1da177e4
LT
327
328 if (cl == 0)
329 return NULL;
330 leaf = cops->leaf(p, cl);
1da177e4
LT
331 return leaf;
332}
333
334/* Find queueing discipline by name */
335
1e90474c 336static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
337{
338 struct Qdisc_ops *q = NULL;
339
340 if (kind) {
341 read_lock(&qdisc_mod_lock);
342 for (q = qdisc_base; q; q = q->next) {
1e90474c 343 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
344 if (!try_module_get(q->owner))
345 q = NULL;
346 break;
347 }
348 }
349 read_unlock(&qdisc_mod_lock);
350 }
351 return q;
352}
353
8a8e3d84
JDB
354/* The linklayer setting were not transferred from iproute2, in older
355 * versions, and the rate tables lookup systems have been dropped in
356 * the kernel. To keep backward compatible with older iproute2 tc
357 * utils, we detect the linklayer setting by detecting if the rate
358 * table were modified.
359 *
360 * For linklayer ATM table entries, the rate table will be aligned to
361 * 48 bytes, thus some table entries will contain the same value. The
362 * mpu (min packet unit) is also encoded into the old rate table, thus
363 * starting from the mpu, we find low and high table entries for
364 * mapping this cell. If these entries contain the same value, when
365 * the rate tables have been modified for linklayer ATM.
366 *
367 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
368 * and then roundup to the next cell, calc the table entry one below,
369 * and compare.
370 */
371static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
372{
373 int low = roundup(r->mpu, 48);
374 int high = roundup(low+1, 48);
375 int cell_low = low >> r->cell_log;
376 int cell_high = (high >> r->cell_log) - 1;
377
378 /* rtab is too inaccurate at rates > 100Mbit/s */
379 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
380 pr_debug("TC linklayer: Giving up ATM detection\n");
381 return TC_LINKLAYER_ETHERNET;
382 }
383
384 if ((cell_high > cell_low) && (cell_high < 256)
385 && (rtab[cell_low] == rtab[cell_high])) {
386 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
387 cell_low, cell_high, rtab[cell_high]);
388 return TC_LINKLAYER_ATM;
389 }
390 return TC_LINKLAYER_ETHERNET;
391}
392
1da177e4
LT
393static struct qdisc_rate_table *qdisc_rtab_list;
394
5a7a5555 395struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
e9bc3fa2
AA
396 struct nlattr *tab,
397 struct netlink_ext_ack *extack)
1da177e4
LT
398{
399 struct qdisc_rate_table *rtab;
400
40edeff6 401 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
e9bc3fa2
AA
402 nla_len(tab) != TC_RTAB_SIZE) {
403 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
40edeff6 404 return NULL;
e9bc3fa2 405 }
40edeff6 406
1da177e4 407 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
40edeff6
ED
408 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
409 !memcmp(&rtab->data, nla_data(tab), 1024)) {
1da177e4
LT
410 rtab->refcnt++;
411 return rtab;
412 }
413 }
414
1da177e4
LT
415 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
416 if (rtab) {
417 rtab->rate = *r;
418 rtab->refcnt = 1;
1e90474c 419 memcpy(rtab->data, nla_data(tab), 1024);
8a8e3d84
JDB
420 if (r->linklayer == TC_LINKLAYER_UNAWARE)
421 r->linklayer = __detect_linklayer(r, rtab->data);
1da177e4
LT
422 rtab->next = qdisc_rtab_list;
423 qdisc_rtab_list = rtab;
e9bc3fa2
AA
424 } else {
425 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
1da177e4
LT
426 }
427 return rtab;
428}
62e3ba1b 429EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
430
431void qdisc_put_rtab(struct qdisc_rate_table *tab)
432{
433 struct qdisc_rate_table *rtab, **rtabp;
434
435 if (!tab || --tab->refcnt)
436 return;
437
cc7ec456
ED
438 for (rtabp = &qdisc_rtab_list;
439 (rtab = *rtabp) != NULL;
440 rtabp = &rtab->next) {
1da177e4
LT
441 if (rtab == tab) {
442 *rtabp = rtab->next;
443 kfree(rtab);
444 return;
445 }
446 }
447}
62e3ba1b 448EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 449
175f9c1b 450static LIST_HEAD(qdisc_stab_list);
175f9c1b
JK
451
452static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
453 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
454 [TCA_STAB_DATA] = { .type = NLA_BINARY },
455};
456
09215598
AA
457static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
458 struct netlink_ext_ack *extack)
175f9c1b
JK
459{
460 struct nlattr *tb[TCA_STAB_MAX + 1];
461 struct qdisc_size_table *stab;
462 struct tc_sizespec *s;
463 unsigned int tsize = 0;
464 u16 *tab = NULL;
465 int err;
466
09215598 467 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy, extack);
175f9c1b
JK
468 if (err < 0)
469 return ERR_PTR(err);
09215598
AA
470 if (!tb[TCA_STAB_BASE]) {
471 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
175f9c1b 472 return ERR_PTR(-EINVAL);
09215598 473 }
175f9c1b
JK
474
475 s = nla_data(tb[TCA_STAB_BASE]);
476
477 if (s->tsize > 0) {
09215598
AA
478 if (!tb[TCA_STAB_DATA]) {
479 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
175f9c1b 480 return ERR_PTR(-EINVAL);
09215598 481 }
175f9c1b
JK
482 tab = nla_data(tb[TCA_STAB_DATA]);
483 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
484 }
485
09215598
AA
486 if (tsize != s->tsize || (!tab && tsize > 0)) {
487 NL_SET_ERR_MSG(extack, "Invalid size of size table");
175f9c1b 488 return ERR_PTR(-EINVAL);
09215598 489 }
175f9c1b 490
175f9c1b
JK
491 list_for_each_entry(stab, &qdisc_stab_list, list) {
492 if (memcmp(&stab->szopts, s, sizeof(*s)))
493 continue;
494 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
495 continue;
496 stab->refcnt++;
175f9c1b
JK
497 return stab;
498 }
499
175f9c1b
JK
500 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
501 if (!stab)
502 return ERR_PTR(-ENOMEM);
503
504 stab->refcnt = 1;
505 stab->szopts = *s;
506 if (tsize > 0)
507 memcpy(stab->data, tab, tsize * sizeof(u16));
508
175f9c1b 509 list_add_tail(&stab->list, &qdisc_stab_list);
175f9c1b
JK
510
511 return stab;
512}
513
a2da570d
ED
514static void stab_kfree_rcu(struct rcu_head *head)
515{
516 kfree(container_of(head, struct qdisc_size_table, rcu));
517}
518
175f9c1b
JK
519void qdisc_put_stab(struct qdisc_size_table *tab)
520{
521 if (!tab)
522 return;
523
175f9c1b
JK
524 if (--tab->refcnt == 0) {
525 list_del(&tab->list);
a2da570d 526 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
175f9c1b 527 }
175f9c1b
JK
528}
529EXPORT_SYMBOL(qdisc_put_stab);
530
531static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
532{
533 struct nlattr *nest;
534
535 nest = nla_nest_start(skb, TCA_STAB);
3aa4614d
PM
536 if (nest == NULL)
537 goto nla_put_failure;
1b34ec43
DM
538 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
539 goto nla_put_failure;
175f9c1b
JK
540 nla_nest_end(skb, nest);
541
542 return skb->len;
543
544nla_put_failure:
545 return -1;
546}
547
5a7a5555
JHS
548void __qdisc_calculate_pkt_len(struct sk_buff *skb,
549 const struct qdisc_size_table *stab)
175f9c1b
JK
550{
551 int pkt_len, slot;
552
553 pkt_len = skb->len + stab->szopts.overhead;
554 if (unlikely(!stab->szopts.tsize))
555 goto out;
556
557 slot = pkt_len + stab->szopts.cell_align;
558 if (unlikely(slot < 0))
559 slot = 0;
560
561 slot >>= stab->szopts.cell_log;
562 if (likely(slot < stab->szopts.tsize))
563 pkt_len = stab->data[slot];
564 else
565 pkt_len = stab->data[stab->szopts.tsize - 1] *
566 (slot / stab->szopts.tsize) +
567 stab->data[slot % stab->szopts.tsize];
568
569 pkt_len <<= stab->szopts.size_log;
570out:
571 if (unlikely(pkt_len < 1))
572 pkt_len = 1;
573 qdisc_skb_cb(skb)->pkt_len = pkt_len;
574}
a2da570d 575EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
175f9c1b 576
6e765a00 577void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
b00355db
JP
578{
579 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
cc7ec456
ED
580 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
581 txt, qdisc->ops->id, qdisc->handle >> 16);
b00355db
JP
582 qdisc->flags |= TCQ_F_WARN_NONWC;
583 }
584}
585EXPORT_SYMBOL(qdisc_warn_nonwc);
586
4179477f
PM
587static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
588{
589 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da3 590 timer);
4179477f 591
1e203c1a 592 rcu_read_lock();
8608db03 593 __netif_schedule(qdisc_root(wd->qdisc));
1e203c1a 594 rcu_read_unlock();
1936502d 595
4179477f
PM
596 return HRTIMER_NORESTART;
597}
598
599void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
600{
4a8e320c 601 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
2fbd3da3 602 wd->timer.function = qdisc_watchdog;
4179477f
PM
603 wd->qdisc = qdisc;
604}
605EXPORT_SYMBOL(qdisc_watchdog_init);
606
45f50bed 607void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
4179477f 608{
2540e051
JP
609 if (test_bit(__QDISC_STATE_DEACTIVATED,
610 &qdisc_root_sleeping(wd->qdisc)->state))
611 return;
612
a9efad8b
ED
613 if (wd->last_expires == expires)
614 return;
615
616 wd->last_expires = expires;
46baac38 617 hrtimer_start(&wd->timer,
34c5d292 618 ns_to_ktime(expires),
4a8e320c 619 HRTIMER_MODE_ABS_PINNED);
4179477f 620}
34c5d292 621EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
4179477f
PM
622
623void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
624{
2fbd3da3 625 hrtimer_cancel(&wd->timer);
4179477f
PM
626}
627EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 628
a94f779f 629static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5 630{
6fe1c7a5 631 struct hlist_head *h;
9695fe6f 632 unsigned int i;
6fe1c7a5 633
9695fe6f 634 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
6fe1c7a5
PM
635
636 if (h != NULL) {
637 for (i = 0; i < n; i++)
638 INIT_HLIST_HEAD(&h[i]);
639 }
640 return h;
641}
642
6fe1c7a5
PM
643void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
644{
645 struct Qdisc_class_common *cl;
b67bfe0d 646 struct hlist_node *next;
6fe1c7a5
PM
647 struct hlist_head *nhash, *ohash;
648 unsigned int nsize, nmask, osize;
649 unsigned int i, h;
650
651 /* Rehash when load factor exceeds 0.75 */
652 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
653 return;
654 nsize = clhash->hashsize * 2;
655 nmask = nsize - 1;
656 nhash = qdisc_class_hash_alloc(nsize);
657 if (nhash == NULL)
658 return;
659
660 ohash = clhash->hash;
661 osize = clhash->hashsize;
662
663 sch_tree_lock(sch);
664 for (i = 0; i < osize; i++) {
b67bfe0d 665 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
6fe1c7a5
PM
666 h = qdisc_class_hash(cl->classid, nmask);
667 hlist_add_head(&cl->hnode, &nhash[h]);
668 }
669 }
670 clhash->hash = nhash;
671 clhash->hashsize = nsize;
672 clhash->hashmask = nmask;
673 sch_tree_unlock(sch);
674
9695fe6f 675 kvfree(ohash);
6fe1c7a5
PM
676}
677EXPORT_SYMBOL(qdisc_class_hash_grow);
678
679int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
680{
681 unsigned int size = 4;
682
683 clhash->hash = qdisc_class_hash_alloc(size);
ac8ef4ab 684 if (!clhash->hash)
6fe1c7a5
PM
685 return -ENOMEM;
686 clhash->hashsize = size;
687 clhash->hashmask = size - 1;
688 clhash->hashelems = 0;
689 return 0;
690}
691EXPORT_SYMBOL(qdisc_class_hash_init);
692
693void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
694{
9695fe6f 695 kvfree(clhash->hash);
6fe1c7a5
PM
696}
697EXPORT_SYMBOL(qdisc_class_hash_destroy);
698
699void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
700 struct Qdisc_class_common *cl)
701{
702 unsigned int h;
703
704 INIT_HLIST_NODE(&cl->hnode);
705 h = qdisc_class_hash(cl->classid, clhash->hashmask);
706 hlist_add_head(&cl->hnode, &clhash->hash[h]);
707 clhash->hashelems++;
708}
709EXPORT_SYMBOL(qdisc_class_hash_insert);
710
711void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
712 struct Qdisc_class_common *cl)
713{
714 hlist_del(&cl->hnode);
715 clhash->hashelems--;
716}
717EXPORT_SYMBOL(qdisc_class_hash_remove);
718
fa0f5aa7
ED
719/* Allocate an unique handle from space managed by kernel
720 * Possible range is [8000-FFFF]:0000 (0x8000 values)
721 */
1da177e4
LT
722static u32 qdisc_alloc_handle(struct net_device *dev)
723{
fa0f5aa7 724 int i = 0x8000;
1da177e4
LT
725 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
726
727 do {
728 autohandle += TC_H_MAKE(0x10000U, 0);
729 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
730 autohandle = TC_H_MAKE(0x80000000U, 0);
fa0f5aa7
ED
731 if (!qdisc_lookup(dev, autohandle))
732 return autohandle;
733 cond_resched();
734 } while (--i > 0);
1da177e4 735
fa0f5aa7 736 return 0;
1da177e4
LT
737}
738
2ccccf5f
WC
739void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
740 unsigned int len)
43effa1e 741{
20fea08b 742 const struct Qdisc_class_ops *cops;
43effa1e
PM
743 unsigned long cl;
744 u32 parentid;
95946658 745 bool notify;
2c8c8e6f 746 int drops;
43effa1e 747
2ccccf5f 748 if (n == 0 && len == 0)
43effa1e 749 return;
2c8c8e6f 750 drops = max_t(int, n, 0);
4eaf3b84 751 rcu_read_lock();
43effa1e 752 while ((parentid = sch->parent)) {
066a3b5b 753 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
4eaf3b84 754 break;
066a3b5b 755
4eaf3b84
ED
756 if (sch->flags & TCQ_F_NOPARENT)
757 break;
95946658
KK
758 /* Notify parent qdisc only if child qdisc becomes empty.
759 *
760 * If child was empty even before update then backlog
761 * counter is screwed and we skip notification because
762 * parent class is already passive.
763 */
764 notify = !sch->q.qlen && !WARN_ON_ONCE(!n);
4eaf3b84 765 /* TODO: perform the search on a per txq basis */
5ce2d488 766 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa 767 if (sch == NULL) {
4eaf3b84
ED
768 WARN_ON_ONCE(parentid != TC_H_ROOT);
769 break;
ffc8fefa 770 }
43effa1e 771 cops = sch->ops->cl_ops;
95946658 772 if (notify && cops->qlen_notify) {
143976ce 773 cl = cops->find(sch, parentid);
43effa1e 774 cops->qlen_notify(sch, cl);
43effa1e
PM
775 }
776 sch->q.qlen -= n;
2ccccf5f 777 sch->qstats.backlog -= len;
25331d6c 778 __qdisc_qstats_drop(sch, drops);
43effa1e 779 }
4eaf3b84 780 rcu_read_unlock();
43effa1e 781}
2ccccf5f 782EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
1da177e4 783
27d7f07c
WC
784static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
785 u32 portid, u32 seq, u16 flags, int event)
786{
787 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
788 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
789 struct tcmsg *tcm;
790 struct nlmsghdr *nlh;
791 unsigned char *b = skb_tail_pointer(skb);
792 struct gnet_dump d;
793 struct qdisc_size_table *stab;
794 __u32 qlen;
795
796 cond_resched();
797 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
798 if (!nlh)
799 goto out_nlmsg_trim;
800 tcm = nlmsg_data(nlh);
801 tcm->tcm_family = AF_UNSPEC;
802 tcm->tcm__pad1 = 0;
803 tcm->tcm__pad2 = 0;
804 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
805 tcm->tcm_parent = clid;
806 tcm->tcm_handle = q->handle;
807 tcm->tcm_info = refcount_read(&q->refcnt);
808 if (nla_put_string(skb, TCA_KIND, q->ops->id))
809 goto nla_put_failure;
7a4fa291
YM
810 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
811 goto nla_put_failure;
27d7f07c
WC
812 if (q->ops->dump && q->ops->dump(q, skb) < 0)
813 goto nla_put_failure;
7e66016f
JF
814
815 qlen = qdisc_qlen_sum(q);
27d7f07c
WC
816
817 stab = rtnl_dereference(q->stab);
818 if (stab && qdisc_dump_stab(skb, stab) < 0)
819 goto nla_put_failure;
820
821 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
822 NULL, &d, TCA_PAD) < 0)
823 goto nla_put_failure;
824
825 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
826 goto nla_put_failure;
827
828 if (qdisc_is_percpu_stats(q)) {
829 cpu_bstats = q->cpu_bstats;
830 cpu_qstats = q->cpu_qstats;
831 }
832
833 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
834 &d, cpu_bstats, &q->bstats) < 0 ||
835 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
836 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
837 goto nla_put_failure;
838
839 if (gnet_stats_finish_copy(&d) < 0)
840 goto nla_put_failure;
841
842 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
843 return skb->len;
844
845out_nlmsg_trim:
846nla_put_failure:
847 nlmsg_trim(skb, b);
848 return -1;
849}
850
851static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
852{
853 if (q->flags & TCQ_F_BUILTIN)
854 return true;
855 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
856 return true;
857
858 return false;
859}
860
861static int qdisc_notify(struct net *net, struct sk_buff *oskb,
862 struct nlmsghdr *n, u32 clid,
863 struct Qdisc *old, struct Qdisc *new)
864{
865 struct sk_buff *skb;
866 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
867
868 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
869 if (!skb)
870 return -ENOBUFS;
871
872 if (old && !tc_qdisc_dump_ignore(old, false)) {
873 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
874 0, RTM_DELQDISC) < 0)
875 goto err_out;
876 }
877 if (new && !tc_qdisc_dump_ignore(new, false)) {
878 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
879 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
880 goto err_out;
881 }
882
883 if (skb->len)
884 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
885 n->nlmsg_flags & NLM_F_ECHO);
886
887err_out:
888 kfree_skb(skb);
889 return -EINVAL;
890}
891
7316ae88
TG
892static void notify_and_destroy(struct net *net, struct sk_buff *skb,
893 struct nlmsghdr *n, u32 clid,
99194cff
DM
894 struct Qdisc *old, struct Qdisc *new)
895{
896 if (new || old)
7316ae88 897 qdisc_notify(net, skb, n, clid, old, new);
1da177e4 898
4d8863a2 899 if (old)
99194cff 900 qdisc_destroy(old);
99194cff
DM
901}
902
903/* Graft qdisc "new" to class "classid" of qdisc "parent" or
904 * to device "dev".
905 *
906 * When appropriate send a netlink notification using 'skb'
907 * and "n".
908 *
909 * On success, destroy old qdisc.
1da177e4
LT
910 */
911
912static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff 913 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
09215598
AA
914 struct Qdisc *new, struct Qdisc *old,
915 struct netlink_ext_ack *extack)
1da177e4 916{
99194cff 917 struct Qdisc *q = old;
7316ae88 918 struct net *net = dev_net(dev);
1da177e4 919 int err = 0;
1da177e4 920
10297b99 921 if (parent == NULL) {
99194cff
DM
922 unsigned int i, num_q, ingress;
923
924 ingress = 0;
925 num_q = dev->num_tx_queues;
8d50b53d
DM
926 if ((q && q->flags & TCQ_F_INGRESS) ||
927 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
928 num_q = 1;
929 ingress = 1;
09215598
AA
930 if (!dev_ingress_queue(dev)) {
931 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
24824a09 932 return -ENOENT;
09215598 933 }
99194cff
DM
934 }
935
936 if (dev->flags & IFF_UP)
937 dev_deactivate(dev);
938
86e363dc
WC
939 if (new && new->ops->attach)
940 goto skip;
6ec1c69a 941
99194cff 942 for (i = 0; i < num_q; i++) {
24824a09 943 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
99194cff
DM
944
945 if (!ingress)
946 dev_queue = netdev_get_tx_queue(dev, i);
947
8d50b53d
DM
948 old = dev_graft_qdisc(dev_queue, new);
949 if (new && i > 0)
551143d8 950 qdisc_refcount_inc(new);
8d50b53d 951
036d6a67
JP
952 if (!ingress)
953 qdisc_destroy(old);
1da177e4 954 }
99194cff 955
86e363dc 956skip:
036d6a67 957 if (!ingress) {
7316ae88
TG
958 notify_and_destroy(net, skb, n, classid,
959 dev->qdisc, new);
036d6a67 960 if (new && !new->ops->attach)
551143d8 961 qdisc_refcount_inc(new);
036d6a67 962 dev->qdisc = new ? : &noop_qdisc;
86e363dc
WC
963
964 if (new && new->ops->attach)
965 new->ops->attach(new);
036d6a67 966 } else {
7316ae88 967 notify_and_destroy(net, skb, n, classid, old, new);
036d6a67 968 }
af356afa 969
99194cff
DM
970 if (dev->flags & IFF_UP)
971 dev_activate(dev);
1da177e4 972 } else {
20fea08b 973 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
1da177e4 974
c5ad119f
JF
975 /* Only support running class lockless if parent is lockless */
976 if (new && (new->flags & TCQ_F_NOLOCK) &&
977 parent && !(parent->flags & TCQ_F_NOLOCK))
978 new->flags &= ~TCQ_F_NOLOCK;
979
c9f1d038
PM
980 err = -EOPNOTSUPP;
981 if (cops && cops->graft) {
143976ce
WC
982 unsigned long cl = cops->find(parent, classid);
983
09215598 984 if (cl) {
653d6fd6
AA
985 err = cops->graft(parent, cl, new, &old,
986 extack);
09215598
AA
987 } else {
988 NL_SET_ERR_MSG(extack, "Specified class not found");
c9f1d038 989 err = -ENOENT;
09215598 990 }
1da177e4 991 }
99194cff 992 if (!err)
7316ae88 993 notify_and_destroy(net, skb, n, classid, old, new);
1da177e4
LT
994 }
995 return err;
996}
997
25bfcd5a
JP
998/* lockdep annotation is needed for ingress; egress gets it only for name */
999static struct lock_class_key qdisc_tx_lock;
1000static struct lock_class_key qdisc_rx_lock;
1001
1da177e4
LT
1002/*
1003 Allocate and initialize new qdisc.
1004
1005 Parameters are passed via opt.
1006 */
1007
5a7a5555
JHS
1008static struct Qdisc *qdisc_create(struct net_device *dev,
1009 struct netdev_queue *dev_queue,
1010 struct Qdisc *p, u32 parent, u32 handle,
09215598
AA
1011 struct nlattr **tca, int *errp,
1012 struct netlink_ext_ack *extack)
1da177e4
LT
1013{
1014 int err;
1e90474c 1015 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
1016 struct Qdisc *sch;
1017 struct Qdisc_ops *ops;
175f9c1b 1018 struct qdisc_size_table *stab;
1da177e4
LT
1019
1020 ops = qdisc_lookup_ops(kind);
95a5afca 1021#ifdef CONFIG_MODULES
1da177e4
LT
1022 if (ops == NULL && kind != NULL) {
1023 char name[IFNAMSIZ];
1e90474c 1024 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
1da177e4
LT
1025 /* We dropped the RTNL semaphore in order to
1026 * perform the module load. So, even if we
1027 * succeeded in loading the module we have to
1028 * tell the caller to replay the request. We
1029 * indicate this using -EAGAIN.
1030 * We replay the request because the device may
1031 * go away in the mean time.
1032 */
1033 rtnl_unlock();
1034 request_module("sch_%s", name);
1035 rtnl_lock();
1036 ops = qdisc_lookup_ops(kind);
1037 if (ops != NULL) {
1038 /* We will try again qdisc_lookup_ops,
1039 * so don't keep a reference.
1040 */
1041 module_put(ops->owner);
1042 err = -EAGAIN;
1043 goto err_out;
1044 }
1045 }
1046 }
1047#endif
1048
b9e2cc0f 1049 err = -ENOENT;
09215598
AA
1050 if (!ops) {
1051 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1da177e4 1052 goto err_out;
09215598 1053 }
1da177e4 1054
d0bd684d 1055 sch = qdisc_alloc(dev_queue, ops, extack);
3d54b82f
TG
1056 if (IS_ERR(sch)) {
1057 err = PTR_ERR(sch);
1da177e4 1058 goto err_out2;
3d54b82f 1059 }
1da177e4 1060
ffc8fefa
PM
1061 sch->parent = parent;
1062
3d54b82f 1063 if (handle == TC_H_INGRESS) {
1da177e4 1064 sch->flags |= TCQ_F_INGRESS;
3d54b82f 1065 handle = TC_H_MAKE(TC_H_INGRESS, 0);
25bfcd5a 1066 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
fd44de7c 1067 } else {
fd44de7c
PM
1068 if (handle == 0) {
1069 handle = qdisc_alloc_handle(dev);
1070 err = -ENOMEM;
1071 if (handle == 0)
1072 goto err_out3;
1073 }
25bfcd5a 1074 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
1abbe139 1075 if (!netif_is_multiqueue(dev))
225734de 1076 sch->flags |= TCQ_F_ONETXQUEUE;
1da177e4
LT
1077 }
1078
3d54b82f 1079 sch->handle = handle;
1da177e4 1080
84c46dd8
JDB
1081 /* This exist to keep backward compatible with a userspace
1082 * loophole, what allowed userspace to get IFF_NO_QUEUE
1083 * facility on older kernels by setting tx_queue_len=0 (prior
1084 * to qdisc init), and then forgot to reinit tx_queue_len
1085 * before again attaching a qdisc.
1086 */
1087 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1088 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1089 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1090 }
1091
54160ef6 1092 if (ops->init) {
e63d7dfd 1093 err = ops->init(sch, tca[TCA_OPTIONS], extack);
54160ef6
AA
1094 if (err != 0)
1095 goto err_out5;
1096 }
22e0f8b9 1097
54160ef6
AA
1098 if (qdisc_is_percpu_stats(sch)) {
1099 sch->cpu_bstats =
1100 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
1101 if (!sch->cpu_bstats)
1102 goto err_out4;
f6f9b93f 1103
54160ef6
AA
1104 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
1105 if (!sch->cpu_qstats)
1106 goto err_out4;
1107 }
23bcf634 1108
54160ef6 1109 if (tca[TCA_STAB]) {
09215598 1110 stab = qdisc_get_stab(tca[TCA_STAB], extack);
54160ef6
AA
1111 if (IS_ERR(stab)) {
1112 err = PTR_ERR(stab);
1113 goto err_out4;
023e09a7 1114 }
54160ef6
AA
1115 rcu_assign_pointer(sch->stab, stab);
1116 }
1117 if (tca[TCA_RATE]) {
1118 seqcount_t *running;
f6e0b239 1119
54160ef6 1120 err = -EOPNOTSUPP;
09215598
AA
1121 if (sch->flags & TCQ_F_MQROOT) {
1122 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
54160ef6 1123 goto err_out4;
09215598 1124 }
1da177e4 1125
54160ef6
AA
1126 if (sch->parent != TC_H_ROOT &&
1127 !(sch->flags & TCQ_F_INGRESS) &&
1128 (!p || !(p->flags & TCQ_F_MQROOT)))
1129 running = qdisc_root_sleeping_running(sch);
1130 else
1131 running = &sch->running;
1132
1133 err = gen_new_estimator(&sch->bstats,
1134 sch->cpu_bstats,
1135 &sch->rate_est,
1136 NULL,
1137 running,
1138 tca[TCA_RATE]);
09215598
AA
1139 if (err) {
1140 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
54160ef6 1141 goto err_out4;
09215598 1142 }
1da177e4 1143 }
54160ef6
AA
1144
1145 qdisc_hash_add(sch, false);
1146
1147 return sch;
1148
1149err_out5:
87b60cfa 1150 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
c1a4872e
GF
1151 if (ops->destroy)
1152 ops->destroy(sch);
1da177e4
LT
1153err_out3:
1154 dev_put(dev);
3d54b82f 1155 kfree((char *) sch - sch->padded);
1da177e4
LT
1156err_out2:
1157 module_put(ops->owner);
1158err_out:
1159 *errp = err;
1da177e4 1160 return NULL;
23bcf634
PM
1161
1162err_out4:
22e0f8b9 1163 free_percpu(sch->cpu_bstats);
b0ab6f92 1164 free_percpu(sch->cpu_qstats);
23bcf634
PM
1165 /*
1166 * Any broken qdiscs that would require a ops->reset() here?
1167 * The qdisc was never in action so it shouldn't be necessary.
1168 */
a2da570d 1169 qdisc_put_stab(rtnl_dereference(sch->stab));
23bcf634
PM
1170 if (ops->destroy)
1171 ops->destroy(sch);
1172 goto err_out3;
1da177e4
LT
1173}
1174
09215598
AA
1175static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1176 struct netlink_ext_ack *extack)
1da177e4 1177{
a2da570d 1178 struct qdisc_size_table *ostab, *stab = NULL;
175f9c1b 1179 int err = 0;
1da177e4 1180
175f9c1b 1181 if (tca[TCA_OPTIONS]) {
09215598
AA
1182 if (!sch->ops->change) {
1183 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1da177e4 1184 return -EINVAL;
09215598 1185 }
2030721c 1186 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1da177e4
LT
1187 if (err)
1188 return err;
1189 }
175f9c1b
JK
1190
1191 if (tca[TCA_STAB]) {
09215598 1192 stab = qdisc_get_stab(tca[TCA_STAB], extack);
175f9c1b
JK
1193 if (IS_ERR(stab))
1194 return PTR_ERR(stab);
1195 }
1196
a2da570d
ED
1197 ostab = rtnl_dereference(sch->stab);
1198 rcu_assign_pointer(sch->stab, stab);
1199 qdisc_put_stab(ostab);
175f9c1b 1200
23bcf634 1201 if (tca[TCA_RATE]) {
71bcb09a
SH
1202 /* NB: ignores errors from replace_estimator
1203 because change can't be undone. */
23bcf634
PM
1204 if (sch->flags & TCQ_F_MQROOT)
1205 goto out;
22e0f8b9
JF
1206 gen_replace_estimator(&sch->bstats,
1207 sch->cpu_bstats,
1208 &sch->rate_est,
edb09eb1
ED
1209 NULL,
1210 qdisc_root_sleeping_running(sch),
22e0f8b9 1211 tca[TCA_RATE]);
23bcf634
PM
1212 }
1213out:
1da177e4
LT
1214 return 0;
1215}
1216
cc7ec456
ED
1217struct check_loop_arg {
1218 struct qdisc_walker w;
1da177e4
LT
1219 struct Qdisc *p;
1220 int depth;
1221};
1222
5a7a5555
JHS
1223static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1224 struct qdisc_walker *w);
1da177e4
LT
1225
1226static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1227{
1228 struct check_loop_arg arg;
1229
1230 if (q->ops->cl_ops == NULL)
1231 return 0;
1232
1233 arg.w.stop = arg.w.skip = arg.w.count = 0;
1234 arg.w.fn = check_loop_fn;
1235 arg.depth = depth;
1236 arg.p = p;
1237 q->ops->cl_ops->walk(q, &arg.w);
1238 return arg.w.stop ? -ELOOP : 0;
1239}
1240
1241static int
1242check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1243{
1244 struct Qdisc *leaf;
20fea08b 1245 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
1246 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1247
1248 leaf = cops->leaf(q, cl);
1249 if (leaf) {
1250 if (leaf == arg->p || arg->depth > 7)
1251 return -ELOOP;
1252 return check_loop(leaf, arg->p, arg->depth + 1);
1253 }
1254 return 0;
1255}
1256
1257/*
1258 * Delete/get qdisc.
1259 */
1260
c21ef3e3
DA
1261static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1262 struct netlink_ext_ack *extack)
1da177e4 1263{
3b1e0a65 1264 struct net *net = sock_net(skb->sk);
02ef22ca 1265 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1266 struct nlattr *tca[TCA_MAX + 1];
1da177e4 1267 struct net_device *dev;
de179c8c 1268 u32 clid;
1da177e4
LT
1269 struct Qdisc *q = NULL;
1270 struct Qdisc *p = NULL;
1271 int err;
1272
4e8bbb81 1273 if ((n->nlmsg_type != RTM_GETQDISC) &&
5f013c9b 1274 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1275 return -EPERM;
1276
c21ef3e3 1277 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1e90474c
PM
1278 if (err < 0)
1279 return err;
1280
de179c8c
H
1281 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1282 if (!dev)
1283 return -ENODEV;
1284
1285 clid = tcm->tcm_parent;
1da177e4
LT
1286 if (clid) {
1287 if (clid != TC_H_ROOT) {
1288 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
cc7ec456 1289 p = qdisc_lookup(dev, TC_H_MAJ(clid));
09215598
AA
1290 if (!p) {
1291 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1da177e4 1292 return -ENOENT;
09215598 1293 }
1da177e4 1294 q = qdisc_leaf(p, clid);
cc7ec456
ED
1295 } else if (dev_ingress_queue(dev)) {
1296 q = dev_ingress_queue(dev)->qdisc_sleeping;
10297b99 1297 }
1da177e4 1298 } else {
af356afa 1299 q = dev->qdisc;
1da177e4 1300 }
09215598
AA
1301 if (!q) {
1302 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1da177e4 1303 return -ENOENT;
09215598 1304 }
1da177e4 1305
09215598
AA
1306 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1307 NL_SET_ERR_MSG(extack, "Invalid handle");
1da177e4 1308 return -EINVAL;
09215598 1309 }
1da177e4 1310 } else {
cc7ec456 1311 q = qdisc_lookup(dev, tcm->tcm_handle);
09215598
AA
1312 if (!q) {
1313 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1da177e4 1314 return -ENOENT;
09215598 1315 }
1da177e4
LT
1316 }
1317
09215598
AA
1318 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1319 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1320 return -EINVAL;
09215598 1321 }
1da177e4
LT
1322
1323 if (n->nlmsg_type == RTM_DELQDISC) {
09215598
AA
1324 if (!clid) {
1325 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1da177e4 1326 return -EINVAL;
09215598
AA
1327 }
1328 if (q->handle == 0) {
1329 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1da177e4 1330 return -ENOENT;
09215598
AA
1331 }
1332 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
cc7ec456 1333 if (err != 0)
1da177e4 1334 return err;
1da177e4 1335 } else {
7316ae88 1336 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1337 }
1338 return 0;
1339}
1340
1341/*
cc7ec456 1342 * Create/change qdisc.
1da177e4
LT
1343 */
1344
c21ef3e3
DA
1345static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1346 struct netlink_ext_ack *extack)
1da177e4 1347{
3b1e0a65 1348 struct net *net = sock_net(skb->sk);
1da177e4 1349 struct tcmsg *tcm;
1e90474c 1350 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1351 struct net_device *dev;
1352 u32 clid;
1353 struct Qdisc *q, *p;
1354 int err;
1355
5f013c9b 1356 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1357 return -EPERM;
1358
1da177e4
LT
1359replay:
1360 /* Reinit, just in case something touches this. */
c21ef3e3 1361 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
de179c8c
H
1362 if (err < 0)
1363 return err;
1364
02ef22ca 1365 tcm = nlmsg_data(n);
1da177e4
LT
1366 clid = tcm->tcm_parent;
1367 q = p = NULL;
1368
cc7ec456
ED
1369 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1370 if (!dev)
1da177e4
LT
1371 return -ENODEV;
1372
1e90474c 1373
1da177e4
LT
1374 if (clid) {
1375 if (clid != TC_H_ROOT) {
1376 if (clid != TC_H_INGRESS) {
cc7ec456 1377 p = qdisc_lookup(dev, TC_H_MAJ(clid));
09215598
AA
1378 if (!p) {
1379 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1da177e4 1380 return -ENOENT;
09215598 1381 }
1da177e4 1382 q = qdisc_leaf(p, clid);
cc7ec456
ED
1383 } else if (dev_ingress_queue_create(dev)) {
1384 q = dev_ingress_queue(dev)->qdisc_sleeping;
1da177e4
LT
1385 }
1386 } else {
af356afa 1387 q = dev->qdisc;
1da177e4
LT
1388 }
1389
1390 /* It may be default qdisc, ignore it */
1391 if (q && q->handle == 0)
1392 q = NULL;
1393
1394 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1395 if (tcm->tcm_handle) {
09215598
AA
1396 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1397 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1da177e4 1398 return -EEXIST;
09215598
AA
1399 }
1400 if (TC_H_MIN(tcm->tcm_handle)) {
1401 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1da177e4 1402 return -EINVAL;
09215598 1403 }
cc7ec456 1404 q = qdisc_lookup(dev, tcm->tcm_handle);
8ec69574 1405 if (!q)
1da177e4 1406 goto create_n_graft;
09215598
AA
1407 if (n->nlmsg_flags & NLM_F_EXCL) {
1408 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1da177e4 1409 return -EEXIST;
09215598 1410 }
0ac4bd68 1411 if (tca[TCA_KIND] &&
09215598
AA
1412 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1413 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1414 return -EINVAL;
09215598 1415 }
1da177e4 1416 if (q == p ||
09215598
AA
1417 (p && check_loop(q, p, 0))) {
1418 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1da177e4 1419 return -ELOOP;
09215598 1420 }
551143d8 1421 qdisc_refcount_inc(q);
1da177e4
LT
1422 goto graft;
1423 } else {
cc7ec456 1424 if (!q)
1da177e4
LT
1425 goto create_n_graft;
1426
1427 /* This magic test requires explanation.
1428 *
1429 * We know, that some child q is already
1430 * attached to this parent and have choice:
1431 * either to change it or to create/graft new one.
1432 *
1433 * 1. We are allowed to create/graft only
1434 * if CREATE and REPLACE flags are set.
1435 *
1436 * 2. If EXCL is set, requestor wanted to say,
1437 * that qdisc tcm_handle is not expected
1438 * to exist, so that we choose create/graft too.
1439 *
1440 * 3. The last case is when no flags are set.
1441 * Alas, it is sort of hole in API, we
1442 * cannot decide what to do unambiguously.
1443 * For now we select create/graft, if
1444 * user gave KIND, which does not match existing.
1445 */
cc7ec456
ED
1446 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1447 (n->nlmsg_flags & NLM_F_REPLACE) &&
1448 ((n->nlmsg_flags & NLM_F_EXCL) ||
1e90474c
PM
1449 (tca[TCA_KIND] &&
1450 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1451 goto create_n_graft;
1452 }
1453 }
1454 } else {
09215598
AA
1455 if (!tcm->tcm_handle) {
1456 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1da177e4 1457 return -EINVAL;
09215598 1458 }
1da177e4
LT
1459 q = qdisc_lookup(dev, tcm->tcm_handle);
1460 }
1461
1462 /* Change qdisc parameters */
09215598
AA
1463 if (!q) {
1464 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1da177e4 1465 return -ENOENT;
09215598
AA
1466 }
1467 if (n->nlmsg_flags & NLM_F_EXCL) {
1468 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1da177e4 1469 return -EEXIST;
09215598
AA
1470 }
1471 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1472 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1473 return -EINVAL;
09215598
AA
1474 }
1475 err = qdisc_change(q, tca, extack);
1da177e4 1476 if (err == 0)
7316ae88 1477 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1478 return err;
1479
1480create_n_graft:
09215598
AA
1481 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1482 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1da177e4 1483 return -ENOENT;
09215598 1484 }
24824a09 1485 if (clid == TC_H_INGRESS) {
09215598 1486 if (dev_ingress_queue(dev)) {
24824a09
ED
1487 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1488 tcm->tcm_parent, tcm->tcm_parent,
09215598
AA
1489 tca, &err, extack);
1490 } else {
1491 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
24824a09 1492 err = -ENOENT;
09215598 1493 }
24824a09 1494 } else {
926e61b7 1495 struct netdev_queue *dev_queue;
6ec1c69a
DM
1496
1497 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
926e61b7
JP
1498 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1499 else if (p)
1500 dev_queue = p->dev_queue;
1501 else
1502 dev_queue = netdev_get_tx_queue(dev, 0);
6ec1c69a 1503
926e61b7 1504 q = qdisc_create(dev, dev_queue, p,
bb949fbd 1505 tcm->tcm_parent, tcm->tcm_handle,
09215598 1506 tca, &err, extack);
6ec1c69a 1507 }
1da177e4
LT
1508 if (q == NULL) {
1509 if (err == -EAGAIN)
1510 goto replay;
1511 return err;
1512 }
1513
1514graft:
09215598 1515 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
e5befbd9
IJ
1516 if (err) {
1517 if (q)
1518 qdisc_destroy(q);
1519 return err;
1da177e4 1520 }
e5befbd9 1521
1da177e4
LT
1522 return 0;
1523}
1524
30723673
DM
1525static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1526 struct netlink_callback *cb,
49b49971
JK
1527 int *q_idx_p, int s_q_idx, bool recur,
1528 bool dump_invisible)
30723673
DM
1529{
1530 int ret = 0, q_idx = *q_idx_p;
1531 struct Qdisc *q;
59cc1f61 1532 int b;
30723673
DM
1533
1534 if (!root)
1535 return 0;
1536
1537 q = root;
1538 if (q_idx < s_q_idx) {
1539 q_idx++;
1540 } else {
49b49971 1541 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1542 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1543 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1544 RTM_NEWQDISC) <= 0)
30723673
DM
1545 goto done;
1546 q_idx++;
1547 }
69012ae4 1548
ea327469
JK
1549 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1550 * itself has already been dumped.
1551 *
1552 * If we've already dumped the top-level (ingress) qdisc above and the global
1553 * qdisc hashtable, we don't want to hit it again
1554 */
1555 if (!qdisc_dev(root) || !recur)
69012ae4
JK
1556 goto out;
1557
59cc1f61 1558 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1559 if (q_idx < s_q_idx) {
1560 q_idx++;
1561 continue;
1562 }
49b49971 1563 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1564 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1565 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1566 RTM_NEWQDISC) <= 0)
30723673
DM
1567 goto done;
1568 q_idx++;
1569 }
1570
1571out:
1572 *q_idx_p = q_idx;
1573 return ret;
1574done:
1575 ret = -1;
1576 goto out;
1577}
1578
1da177e4
LT
1579static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1580{
3b1e0a65 1581 struct net *net = sock_net(skb->sk);
1da177e4
LT
1582 int idx, q_idx;
1583 int s_idx, s_q_idx;
1584 struct net_device *dev;
49b49971 1585 const struct nlmsghdr *nlh = cb->nlh;
49b49971
JK
1586 struct nlattr *tca[TCA_MAX + 1];
1587 int err;
1da177e4
LT
1588
1589 s_idx = cb->args[0];
1590 s_q_idx = q_idx = cb->args[1];
f1e9016d 1591
7562f876 1592 idx = 0;
15dc36eb 1593 ASSERT_RTNL();
49b49971 1594
7e5dd53f 1595 err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL);
49b49971
JK
1596 if (err < 0)
1597 return err;
1598
15dc36eb 1599 for_each_netdev(net, dev) {
30723673
DM
1600 struct netdev_queue *dev_queue;
1601
1da177e4 1602 if (idx < s_idx)
7562f876 1603 goto cont;
1da177e4
LT
1604 if (idx > s_idx)
1605 s_q_idx = 0;
1da177e4 1606 q_idx = 0;
30723673 1607
5a7a5555 1608 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
49b49971 1609 true, tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1610 goto done;
1611
24824a09
ED
1612 dev_queue = dev_ingress_queue(dev);
1613 if (dev_queue &&
1614 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
49b49971
JK
1615 &q_idx, s_q_idx, false,
1616 tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1617 goto done;
1618
7562f876
PE
1619cont:
1620 idx++;
1da177e4
LT
1621 }
1622
1623done:
1da177e4
LT
1624 cb->args[0] = idx;
1625 cb->args[1] = q_idx;
1626
1627 return skb->len;
1628}
1629
1630
1631
1632/************************************************
1633 * Traffic classes manipulation. *
1634 ************************************************/
1635
27d7f07c
WC
1636static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1637 unsigned long cl,
1638 u32 portid, u32 seq, u16 flags, int event)
1639{
1640 struct tcmsg *tcm;
1641 struct nlmsghdr *nlh;
1642 unsigned char *b = skb_tail_pointer(skb);
1643 struct gnet_dump d;
1644 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1645
27d7f07c
WC
1646 cond_resched();
1647 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1648 if (!nlh)
1649 goto out_nlmsg_trim;
1650 tcm = nlmsg_data(nlh);
1651 tcm->tcm_family = AF_UNSPEC;
1652 tcm->tcm__pad1 = 0;
1653 tcm->tcm__pad2 = 0;
1654 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1655 tcm->tcm_parent = q->handle;
1656 tcm->tcm_handle = q->handle;
1657 tcm->tcm_info = 0;
1658 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1659 goto nla_put_failure;
1660 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1661 goto nla_put_failure;
1662
1663 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1664 NULL, &d, TCA_PAD) < 0)
1665 goto nla_put_failure;
1666
1667 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1668 goto nla_put_failure;
1669
1670 if (gnet_stats_finish_copy(&d) < 0)
1671 goto nla_put_failure;
1672
1673 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1674 return skb->len;
1675
1676out_nlmsg_trim:
1677nla_put_failure:
1678 nlmsg_trim(skb, b);
1679 return -1;
1680}
1681
1682static int tclass_notify(struct net *net, struct sk_buff *oskb,
1683 struct nlmsghdr *n, struct Qdisc *q,
1684 unsigned long cl, int event)
1685{
1686 struct sk_buff *skb;
1687 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1688
1689 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1690 if (!skb)
1691 return -ENOBUFS;
1692
1693 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1694 kfree_skb(skb);
1695 return -EINVAL;
1696 }
1697
1698 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1699 n->nlmsg_flags & NLM_F_ECHO);
1700}
1da177e4 1701
14546ba1
WC
1702static int tclass_del_notify(struct net *net,
1703 const struct Qdisc_class_ops *cops,
1704 struct sk_buff *oskb, struct nlmsghdr *n,
1705 struct Qdisc *q, unsigned long cl)
1706{
1707 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1708 struct sk_buff *skb;
1709 int err = 0;
1710
1711 if (!cops->delete)
1712 return -EOPNOTSUPP;
1713
1714 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1715 if (!skb)
1716 return -ENOBUFS;
1717
1718 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1719 RTM_DELTCLASS) < 0) {
1720 kfree_skb(skb);
1721 return -EINVAL;
1722 }
1723
1724 err = cops->delete(q, cl);
1725 if (err) {
1726 kfree_skb(skb);
1727 return err;
1728 }
1729
1730 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1731 n->nlmsg_flags & NLM_F_ECHO);
1732}
1733
07d79fc7
CW
1734#ifdef CONFIG_NET_CLS
1735
1736struct tcf_bind_args {
1737 struct tcf_walker w;
1738 u32 classid;
1739 unsigned long cl;
1740};
1741
1742static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1743{
1744 struct tcf_bind_args *a = (void *)arg;
1745
1746 if (tp->ops->bind_class) {
74e3be60
JP
1747 struct Qdisc *q = tcf_block_q(tp->chain->block);
1748
1749 sch_tree_lock(q);
07d79fc7 1750 tp->ops->bind_class(n, a->classid, a->cl);
74e3be60 1751 sch_tree_unlock(q);
07d79fc7
CW
1752 }
1753 return 0;
1754}
1755
1756static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1757 unsigned long new_cl)
1758{
1759 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1760 struct tcf_block *block;
1761 struct tcf_chain *chain;
1762 unsigned long cl;
1763
1764 cl = cops->find(q, portid);
1765 if (!cl)
1766 return;
cbaacc4e 1767 block = cops->tcf_block(q, cl, NULL);
07d79fc7
CW
1768 if (!block)
1769 return;
1770 list_for_each_entry(chain, &block->chain_list, list) {
1771 struct tcf_proto *tp;
1772
1773 for (tp = rtnl_dereference(chain->filter_chain);
1774 tp; tp = rtnl_dereference(tp->next)) {
1775 struct tcf_bind_args arg = {};
1776
1777 arg.w.fn = tcf_node_bind;
1778 arg.classid = clid;
1779 arg.cl = new_cl;
1780 tp->ops->walk(tp, &arg.w);
1781 }
1782 }
1783}
1784
1785#else
1786
1787static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1788 unsigned long new_cl)
1789{
1790}
1791
1792#endif
1793
c21ef3e3
DA
1794static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1795 struct netlink_ext_ack *extack)
1da177e4 1796{
3b1e0a65 1797 struct net *net = sock_net(skb->sk);
02ef22ca 1798 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1799 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1800 struct net_device *dev;
1801 struct Qdisc *q = NULL;
20fea08b 1802 const struct Qdisc_class_ops *cops;
1da177e4
LT
1803 unsigned long cl = 0;
1804 unsigned long new_cl;
de179c8c
H
1805 u32 portid;
1806 u32 clid;
1807 u32 qid;
1da177e4
LT
1808 int err;
1809
4e8bbb81 1810 if ((n->nlmsg_type != RTM_GETTCLASS) &&
5f013c9b 1811 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1812 return -EPERM;
1813
c21ef3e3 1814 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL, extack);
1e90474c
PM
1815 if (err < 0)
1816 return err;
1817
de179c8c
H
1818 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1819 if (!dev)
1820 return -ENODEV;
1821
1da177e4
LT
1822 /*
1823 parent == TC_H_UNSPEC - unspecified parent.
1824 parent == TC_H_ROOT - class is root, which has no parent.
1825 parent == X:0 - parent is root class.
1826 parent == X:Y - parent is a node in hierarchy.
1827 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1828
1829 handle == 0:0 - generate handle from kernel pool.
1830 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1831 handle == X:Y - clear.
1832 handle == X:0 - root class.
1833 */
1834
1835 /* Step 1. Determine qdisc handle X:0 */
1836
de179c8c
H
1837 portid = tcm->tcm_parent;
1838 clid = tcm->tcm_handle;
1839 qid = TC_H_MAJ(clid);
1840
15e47304
EB
1841 if (portid != TC_H_ROOT) {
1842 u32 qid1 = TC_H_MAJ(portid);
1da177e4
LT
1843
1844 if (qid && qid1) {
1845 /* If both majors are known, they must be identical. */
1846 if (qid != qid1)
1847 return -EINVAL;
1848 } else if (qid1) {
1849 qid = qid1;
1850 } else if (qid == 0)
af356afa 1851 qid = dev->qdisc->handle;
1da177e4
LT
1852
1853 /* Now qid is genuine qdisc handle consistent
cc7ec456
ED
1854 * both with parent and child.
1855 *
15e47304 1856 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1da177e4 1857 */
15e47304
EB
1858 if (portid)
1859 portid = TC_H_MAKE(qid, portid);
1da177e4
LT
1860 } else {
1861 if (qid == 0)
af356afa 1862 qid = dev->qdisc->handle;
1da177e4
LT
1863 }
1864
1865 /* OK. Locate qdisc */
cc7ec456
ED
1866 q = qdisc_lookup(dev, qid);
1867 if (!q)
1da177e4
LT
1868 return -ENOENT;
1869
1870 /* An check that it supports classes */
1871 cops = q->ops->cl_ops;
1872 if (cops == NULL)
1873 return -EINVAL;
1874
1875 /* Now try to get class */
1876 if (clid == 0) {
15e47304 1877 if (portid == TC_H_ROOT)
1da177e4
LT
1878 clid = qid;
1879 } else
1880 clid = TC_H_MAKE(qid, clid);
1881
1882 if (clid)
143976ce 1883 cl = cops->find(q, clid);
1da177e4
LT
1884
1885 if (cl == 0) {
1886 err = -ENOENT;
cc7ec456
ED
1887 if (n->nlmsg_type != RTM_NEWTCLASS ||
1888 !(n->nlmsg_flags & NLM_F_CREATE))
1da177e4
LT
1889 goto out;
1890 } else {
1891 switch (n->nlmsg_type) {
10297b99 1892 case RTM_NEWTCLASS:
1da177e4 1893 err = -EEXIST;
cc7ec456 1894 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4
LT
1895 goto out;
1896 break;
1897 case RTM_DELTCLASS:
14546ba1 1898 err = tclass_del_notify(net, cops, skb, n, q, cl);
07d79fc7
CW
1899 /* Unbind the class with flilters with 0 */
1900 tc_bind_tclass(q, portid, clid, 0);
1da177e4
LT
1901 goto out;
1902 case RTM_GETTCLASS:
7316ae88 1903 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1da177e4
LT
1904 goto out;
1905 default:
1906 err = -EINVAL;
1907 goto out;
1908 }
1909 }
1910
1911 new_cl = cl;
de6d5cdf
PM
1912 err = -EOPNOTSUPP;
1913 if (cops->change)
793d81d6 1914 err = cops->change(q, clid, portid, tca, &new_cl, extack);
07d79fc7 1915 if (err == 0) {
7316ae88 1916 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
07d79fc7
CW
1917 /* We just create a new class, need to do reverse binding. */
1918 if (cl != new_cl)
1919 tc_bind_tclass(q, portid, clid, new_cl);
1920 }
1da177e4 1921out:
1da177e4
LT
1922 return err;
1923}
1924
cc7ec456
ED
1925struct qdisc_dump_args {
1926 struct qdisc_walker w;
1927 struct sk_buff *skb;
1928 struct netlink_callback *cb;
1da177e4
LT
1929};
1930
5a7a5555
JHS
1931static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
1932 struct qdisc_walker *arg)
1da177e4
LT
1933{
1934 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1935
15e47304 1936 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
5a7a5555
JHS
1937 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
1938 RTM_NEWTCLASS);
1da177e4
LT
1939}
1940
30723673
DM
1941static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1942 struct tcmsg *tcm, struct netlink_callback *cb,
1943 int *t_p, int s_t)
1944{
1945 struct qdisc_dump_args arg;
1946
49b49971 1947 if (tc_qdisc_dump_ignore(q, false) ||
30723673
DM
1948 *t_p < s_t || !q->ops->cl_ops ||
1949 (tcm->tcm_parent &&
1950 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1951 (*t_p)++;
1952 return 0;
1953 }
1954 if (*t_p > s_t)
1955 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1956 arg.w.fn = qdisc_class_dump;
1957 arg.skb = skb;
1958 arg.cb = cb;
1959 arg.w.stop = 0;
1960 arg.w.skip = cb->args[1];
1961 arg.w.count = 0;
1962 q->ops->cl_ops->walk(q, &arg.w);
1963 cb->args[1] = arg.w.count;
1964 if (arg.w.stop)
1965 return -1;
1966 (*t_p)++;
1967 return 0;
1968}
1969
1970static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1971 struct tcmsg *tcm, struct netlink_callback *cb,
1972 int *t_p, int s_t)
1973{
1974 struct Qdisc *q;
59cc1f61 1975 int b;
30723673
DM
1976
1977 if (!root)
1978 return 0;
1979
1980 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1981 return -1;
1982
69012ae4
JK
1983 if (!qdisc_dev(root))
1984 return 0;
1985
cb395b20
ED
1986 if (tcm->tcm_parent) {
1987 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
1988 if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1989 return -1;
1990 return 0;
1991 }
59cc1f61 1992 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1993 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1994 return -1;
1995 }
1996
1997 return 0;
1998}
1999
1da177e4
LT
2000static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2001{
02ef22ca 2002 struct tcmsg *tcm = nlmsg_data(cb->nlh);
3b1e0a65 2003 struct net *net = sock_net(skb->sk);
30723673 2004 struct netdev_queue *dev_queue;
1da177e4 2005 struct net_device *dev;
30723673 2006 int t, s_t;
1da177e4 2007
573ce260 2008 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1da177e4 2009 return 0;
cc7ec456
ED
2010 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2011 if (!dev)
1da177e4
LT
2012 return 0;
2013
2014 s_t = cb->args[0];
2015 t = 0;
2016
af356afa 2017 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
30723673
DM
2018 goto done;
2019
24824a09
ED
2020 dev_queue = dev_ingress_queue(dev);
2021 if (dev_queue &&
2022 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2023 &t, s_t) < 0)
30723673 2024 goto done;
1da177e4 2025
30723673 2026done:
1da177e4
LT
2027 cb->args[0] = t;
2028
2029 dev_put(dev);
2030 return skb->len;
2031}
2032
1da177e4
LT
2033#ifdef CONFIG_PROC_FS
2034static int psched_show(struct seq_file *seq, void *v)
2035{
2036 seq_printf(seq, "%08x %08x %08x %08x\n",
ca44d6e6 2037 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca32 2038 1000000,
1e317688 2039 (u32)NSEC_PER_SEC / hrtimer_resolution);
1da177e4
LT
2040
2041 return 0;
2042}
2043
2044static int psched_open(struct inode *inode, struct file *file)
2045{
7e5ab157 2046 return single_open(file, psched_show, NULL);
1da177e4
LT
2047}
2048
da7071d7 2049static const struct file_operations psched_fops = {
1da177e4
LT
2050 .owner = THIS_MODULE,
2051 .open = psched_open,
2052 .read = seq_read,
2053 .llseek = seq_lseek,
2054 .release = single_release,
10297b99 2055};
7316ae88
TG
2056
2057static int __net_init psched_net_init(struct net *net)
2058{
2059 struct proc_dir_entry *e;
2060
d4beaa66 2061 e = proc_create("psched", 0, net->proc_net, &psched_fops);
7316ae88
TG
2062 if (e == NULL)
2063 return -ENOMEM;
2064
2065 return 0;
2066}
2067
2068static void __net_exit psched_net_exit(struct net *net)
2069{
ece31ffd 2070 remove_proc_entry("psched", net->proc_net);
7316ae88
TG
2071}
2072#else
2073static int __net_init psched_net_init(struct net *net)
2074{
2075 return 0;
2076}
2077
2078static void __net_exit psched_net_exit(struct net *net)
2079{
2080}
1da177e4
LT
2081#endif
2082
7316ae88
TG
2083static struct pernet_operations psched_net_ops = {
2084 .init = psched_net_init,
2085 .exit = psched_net_exit,
2086};
2087
1da177e4
LT
2088static int __init pktsched_init(void)
2089{
7316ae88
TG
2090 int err;
2091
2092 err = register_pernet_subsys(&psched_net_ops);
2093 if (err) {
cc7ec456 2094 pr_err("pktsched_init: "
7316ae88
TG
2095 "cannot initialize per netns operations\n");
2096 return err;
2097 }
2098
6da7c8fc 2099 register_qdisc(&pfifo_fast_ops);
1da177e4
LT
2100 register_qdisc(&pfifo_qdisc_ops);
2101 register_qdisc(&bfifo_qdisc_ops);
57dbb2d8 2102 register_qdisc(&pfifo_head_drop_qdisc_ops);
6ec1c69a 2103 register_qdisc(&mq_qdisc_ops);
d66d6c31 2104 register_qdisc(&noqueue_qdisc_ops);
1da177e4 2105
b97bac64
FW
2106 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2107 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
5a7a5555 2108 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
b97bac64
FW
2109 0);
2110 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2111 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
5a7a5555 2112 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
b97bac64 2113 0);
be577ddc 2114
1da177e4
LT
2115 return 0;
2116}
2117
2118subsys_initcall(pktsched_init);