Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
[linux-2.6-block.git] / net / sched / sch_api.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
1da177e4
LT
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
1da177e4 17#include <linux/string.h>
1da177e4 18#include <linux/errno.h>
1da177e4 19#include <linux/skbuff.h>
1da177e4
LT
20#include <linux/init.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kmod.h>
24#include <linux/list.h>
4179477f 25#include <linux/hrtimer.h>
5a0e3ad6 26#include <linux/slab.h>
59cc1f61 27#include <linux/hashtable.h>
1da177e4 28
457c4cbc 29#include <net/net_namespace.h>
b854272b 30#include <net/sock.h>
dc5fc579 31#include <net/netlink.h>
1da177e4 32#include <net/pkt_sched.h>
07d79fc7 33#include <net/pkt_cls.h>
1da177e4 34
f5a7833e
CW
35#include <trace/events/qdisc.h>
36
1da177e4
LT
37/*
38
39 Short review.
40 -------------
41
42 This file consists of two interrelated parts:
43
44 1. queueing disciplines manager frontend.
45 2. traffic classes manager frontend.
46
47 Generally, queueing discipline ("qdisc") is a black box,
48 which is able to enqueue packets and to dequeue them (when
49 device is ready to send something) in order and at times
50 determined by algorithm hidden in it.
51
52 qdisc's are divided to two categories:
53 - "queues", which have no internal structure visible from outside.
54 - "schedulers", which split all the packets to "traffic classes",
55 using "packet classifiers" (look at cls_api.c)
56
57 In turn, classes may have child qdiscs (as rule, queues)
58 attached to them etc. etc. etc.
59
60 The goal of the routines in this file is to translate
61 information supplied by user in the form of handles
62 to more intelligible for kernel form, to make some sanity
63 checks and part of work, which is common to all qdiscs
64 and to provide rtnetlink notifications.
65
66 All real intelligent work is done inside qdisc modules.
67
68
69
70 Every discipline has two major routines: enqueue and dequeue.
71
72 ---dequeue
73
74 dequeue usually returns a skb to send. It is allowed to return NULL,
75 but it does not mean that queue is empty, it just means that
76 discipline does not want to send anything this time.
77 Queue is really empty if q->q.qlen == 0.
78 For complicated disciplines with multiple queues q->q is not
79 real packet queue, but however q->q.qlen must be valid.
80
81 ---enqueue
82
83 enqueue returns 0, if packet was enqueued successfully.
84 If packet (this one or another one) was dropped, it returns
85 not zero error code.
86 NET_XMIT_DROP - this packet dropped
87 Expected action: do not backoff, but wait until queue will clear.
88 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
89 Expected action: backoff or ignore
1da177e4
LT
90
91 Auxiliary routines:
92
99c0db26
JP
93 ---peek
94
95 like dequeue but without removing a packet from the queue
96
1da177e4
LT
97 ---reset
98
99 returns qdisc to initial state: purge all buffers, clear all
100 timers, counters (except for statistics) etc.
101
102 ---init
103
104 initializes newly created qdisc.
105
106 ---destroy
107
108 destroys resources allocated by init and during lifetime of qdisc.
109
110 ---change
111
112 changes qdisc parameters.
113 */
114
115/* Protects list of registered TC modules. It is pure SMP lock. */
116static DEFINE_RWLOCK(qdisc_mod_lock);
117
118
119/************************************************
120 * Queueing disciplines manipulation. *
121 ************************************************/
122
123
124/* The list of all installed queueing disciplines. */
125
126static struct Qdisc_ops *qdisc_base;
127
21eb2189 128/* Register/unregister queueing discipline */
1da177e4
LT
129
130int register_qdisc(struct Qdisc_ops *qops)
131{
132 struct Qdisc_ops *q, **qp;
133 int rc = -EEXIST;
134
135 write_lock(&qdisc_mod_lock);
136 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
137 if (!strcmp(qops->id, q->id))
138 goto out;
139
140 if (qops->enqueue == NULL)
141 qops->enqueue = noop_qdisc_ops.enqueue;
99c0db26 142 if (qops->peek == NULL) {
68fd26b5 143 if (qops->dequeue == NULL)
99c0db26 144 qops->peek = noop_qdisc_ops.peek;
68fd26b5
JP
145 else
146 goto out_einval;
99c0db26 147 }
1da177e4
LT
148 if (qops->dequeue == NULL)
149 qops->dequeue = noop_qdisc_ops.dequeue;
150
68fd26b5
JP
151 if (qops->cl_ops) {
152 const struct Qdisc_class_ops *cops = qops->cl_ops;
153
143976ce 154 if (!(cops->find && cops->walk && cops->leaf))
68fd26b5
JP
155 goto out_einval;
156
6529eaba 157 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
68fd26b5
JP
158 goto out_einval;
159 }
160
1da177e4
LT
161 qops->next = NULL;
162 *qp = qops;
163 rc = 0;
164out:
165 write_unlock(&qdisc_mod_lock);
166 return rc;
68fd26b5
JP
167
168out_einval:
169 rc = -EINVAL;
170 goto out;
1da177e4 171}
62e3ba1b 172EXPORT_SYMBOL(register_qdisc);
1da177e4
LT
173
174int unregister_qdisc(struct Qdisc_ops *qops)
175{
176 struct Qdisc_ops *q, **qp;
177 int err = -ENOENT;
178
179 write_lock(&qdisc_mod_lock);
cc7ec456 180 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1da177e4
LT
181 if (q == qops)
182 break;
183 if (q) {
184 *qp = q->next;
185 q->next = NULL;
186 err = 0;
187 }
188 write_unlock(&qdisc_mod_lock);
189 return err;
190}
62e3ba1b 191EXPORT_SYMBOL(unregister_qdisc);
1da177e4 192
6da7c8fc 193/* Get default qdisc if not otherwise specified */
194void qdisc_get_default(char *name, size_t len)
195{
196 read_lock(&qdisc_mod_lock);
197 strlcpy(name, default_qdisc_ops->id, len);
198 read_unlock(&qdisc_mod_lock);
199}
200
201static struct Qdisc_ops *qdisc_lookup_default(const char *name)
202{
203 struct Qdisc_ops *q = NULL;
204
205 for (q = qdisc_base; q; q = q->next) {
206 if (!strcmp(name, q->id)) {
207 if (!try_module_get(q->owner))
208 q = NULL;
209 break;
210 }
211 }
212
213 return q;
214}
215
216/* Set new default qdisc to use */
217int qdisc_set_default(const char *name)
218{
219 const struct Qdisc_ops *ops;
220
221 if (!capable(CAP_NET_ADMIN))
222 return -EPERM;
223
224 write_lock(&qdisc_mod_lock);
225 ops = qdisc_lookup_default(name);
226 if (!ops) {
227 /* Not found, drop lock and try to load module */
228 write_unlock(&qdisc_mod_lock);
229 request_module("sch_%s", name);
230 write_lock(&qdisc_mod_lock);
231
232 ops = qdisc_lookup_default(name);
233 }
234
235 if (ops) {
236 /* Set new default */
237 module_put(default_qdisc_ops->owner);
238 default_qdisc_ops = ops;
239 }
240 write_unlock(&qdisc_mod_lock);
241
242 return ops ? 0 : -ENOENT;
243}
244
8ea3e439 245#ifdef CONFIG_NET_SCH_DEFAULT
246/* Set default value from kernel config */
247static int __init sch_default_qdisc(void)
248{
249 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
250}
251late_initcall(sch_default_qdisc);
252#endif
253
1da177e4 254/* We know handle. Find qdisc among all qdisc's attached to device
4eaf3b84
ED
255 * (root qdisc, all its children, children of children etc.)
256 * Note: caller either uses rtnl or rcu_read_lock()
1da177e4
LT
257 */
258
6113b748 259static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421
DM
260{
261 struct Qdisc *q;
262
69012ae4
JK
263 if (!qdisc_dev(root))
264 return (root->handle == handle ? root : NULL);
265
8123b421
DM
266 if (!(root->flags & TCQ_F_BUILTIN) &&
267 root->handle == handle)
268 return root;
269
a8b7b2d0
JP
270 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
271 lockdep_rtnl_is_held()) {
8123b421
DM
272 if (q->handle == handle)
273 return q;
274 }
275 return NULL;
276}
277
49b49971 278void qdisc_hash_add(struct Qdisc *q, bool invisible)
f6e0b239 279{
37314363 280 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
4eaf3b84 281 ASSERT_RTNL();
59cc1f61 282 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
49b49971
JK
283 if (invisible)
284 q->flags |= TCQ_F_INVISIBLE;
37314363 285 }
f6e0b239 286}
59cc1f61 287EXPORT_SYMBOL(qdisc_hash_add);
f6e0b239 288
59cc1f61 289void qdisc_hash_del(struct Qdisc *q)
f6e0b239 290{
4eaf3b84
ED
291 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
292 ASSERT_RTNL();
59cc1f61 293 hash_del_rcu(&q->hash);
4eaf3b84 294 }
f6e0b239 295}
59cc1f61 296EXPORT_SYMBOL(qdisc_hash_del);
f6e0b239 297
ead81cc5 298struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 299{
f6e0b239
JP
300 struct Qdisc *q;
301
50317fce
CW
302 if (!handle)
303 return NULL;
af356afa
PM
304 q = qdisc_match_from_root(dev->qdisc, handle);
305 if (q)
306 goto out;
f6e0b239 307
24824a09
ED
308 if (dev_ingress_queue(dev))
309 q = qdisc_match_from_root(
310 dev_ingress_queue(dev)->qdisc_sleeping,
311 handle);
f6486d40 312out:
f6e0b239 313 return q;
1da177e4
LT
314}
315
3a7d0d07
VB
316struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
317{
318 struct netdev_queue *nq;
319 struct Qdisc *q;
320
321 if (!handle)
322 return NULL;
323 q = qdisc_match_from_root(dev->qdisc, handle);
324 if (q)
325 goto out;
326
327 nq = dev_ingress_queue_rcu(dev);
328 if (nq)
329 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
330out:
331 return q;
332}
333
1da177e4
LT
334static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
335{
336 unsigned long cl;
20fea08b 337 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
338
339 if (cops == NULL)
340 return NULL;
143976ce 341 cl = cops->find(p, classid);
1da177e4
LT
342
343 if (cl == 0)
344 return NULL;
2561f972 345 return cops->leaf(p, cl);
1da177e4
LT
346}
347
348/* Find queueing discipline by name */
349
1e90474c 350static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
351{
352 struct Qdisc_ops *q = NULL;
353
354 if (kind) {
355 read_lock(&qdisc_mod_lock);
356 for (q = qdisc_base; q; q = q->next) {
1e90474c 357 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
358 if (!try_module_get(q->owner))
359 q = NULL;
360 break;
361 }
362 }
363 read_unlock(&qdisc_mod_lock);
364 }
365 return q;
366}
367
8a8e3d84
JDB
368/* The linklayer setting were not transferred from iproute2, in older
369 * versions, and the rate tables lookup systems have been dropped in
370 * the kernel. To keep backward compatible with older iproute2 tc
371 * utils, we detect the linklayer setting by detecting if the rate
372 * table were modified.
373 *
374 * For linklayer ATM table entries, the rate table will be aligned to
375 * 48 bytes, thus some table entries will contain the same value. The
376 * mpu (min packet unit) is also encoded into the old rate table, thus
377 * starting from the mpu, we find low and high table entries for
378 * mapping this cell. If these entries contain the same value, when
379 * the rate tables have been modified for linklayer ATM.
380 *
381 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
382 * and then roundup to the next cell, calc the table entry one below,
383 * and compare.
384 */
385static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
386{
387 int low = roundup(r->mpu, 48);
388 int high = roundup(low+1, 48);
389 int cell_low = low >> r->cell_log;
390 int cell_high = (high >> r->cell_log) - 1;
391
392 /* rtab is too inaccurate at rates > 100Mbit/s */
393 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
394 pr_debug("TC linklayer: Giving up ATM detection\n");
395 return TC_LINKLAYER_ETHERNET;
396 }
397
398 if ((cell_high > cell_low) && (cell_high < 256)
399 && (rtab[cell_low] == rtab[cell_high])) {
400 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
401 cell_low, cell_high, rtab[cell_high]);
402 return TC_LINKLAYER_ATM;
403 }
404 return TC_LINKLAYER_ETHERNET;
405}
406
1da177e4
LT
407static struct qdisc_rate_table *qdisc_rtab_list;
408
5a7a5555 409struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
e9bc3fa2
AA
410 struct nlattr *tab,
411 struct netlink_ext_ack *extack)
1da177e4
LT
412{
413 struct qdisc_rate_table *rtab;
414
40edeff6 415 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
e9bc3fa2
AA
416 nla_len(tab) != TC_RTAB_SIZE) {
417 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
40edeff6 418 return NULL;
e9bc3fa2 419 }
40edeff6 420
1da177e4 421 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
40edeff6
ED
422 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
423 !memcmp(&rtab->data, nla_data(tab), 1024)) {
1da177e4
LT
424 rtab->refcnt++;
425 return rtab;
426 }
427 }
428
1da177e4
LT
429 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
430 if (rtab) {
431 rtab->rate = *r;
432 rtab->refcnt = 1;
1e90474c 433 memcpy(rtab->data, nla_data(tab), 1024);
8a8e3d84
JDB
434 if (r->linklayer == TC_LINKLAYER_UNAWARE)
435 r->linklayer = __detect_linklayer(r, rtab->data);
1da177e4
LT
436 rtab->next = qdisc_rtab_list;
437 qdisc_rtab_list = rtab;
e9bc3fa2
AA
438 } else {
439 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
1da177e4
LT
440 }
441 return rtab;
442}
62e3ba1b 443EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
444
445void qdisc_put_rtab(struct qdisc_rate_table *tab)
446{
447 struct qdisc_rate_table *rtab, **rtabp;
448
449 if (!tab || --tab->refcnt)
450 return;
451
cc7ec456
ED
452 for (rtabp = &qdisc_rtab_list;
453 (rtab = *rtabp) != NULL;
454 rtabp = &rtab->next) {
1da177e4
LT
455 if (rtab == tab) {
456 *rtabp = rtab->next;
457 kfree(rtab);
458 return;
459 }
460 }
461}
62e3ba1b 462EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 463
175f9c1b 464static LIST_HEAD(qdisc_stab_list);
175f9c1b
JK
465
466static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
467 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
468 [TCA_STAB_DATA] = { .type = NLA_BINARY },
469};
470
09215598
AA
471static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
472 struct netlink_ext_ack *extack)
175f9c1b
JK
473{
474 struct nlattr *tb[TCA_STAB_MAX + 1];
475 struct qdisc_size_table *stab;
476 struct tc_sizespec *s;
477 unsigned int tsize = 0;
478 u16 *tab = NULL;
479 int err;
480
8cb08174
JB
481 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
482 extack);
175f9c1b
JK
483 if (err < 0)
484 return ERR_PTR(err);
09215598
AA
485 if (!tb[TCA_STAB_BASE]) {
486 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
175f9c1b 487 return ERR_PTR(-EINVAL);
09215598 488 }
175f9c1b
JK
489
490 s = nla_data(tb[TCA_STAB_BASE]);
491
492 if (s->tsize > 0) {
09215598
AA
493 if (!tb[TCA_STAB_DATA]) {
494 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
175f9c1b 495 return ERR_PTR(-EINVAL);
09215598 496 }
175f9c1b
JK
497 tab = nla_data(tb[TCA_STAB_DATA]);
498 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
499 }
500
09215598
AA
501 if (tsize != s->tsize || (!tab && tsize > 0)) {
502 NL_SET_ERR_MSG(extack, "Invalid size of size table");
175f9c1b 503 return ERR_PTR(-EINVAL);
09215598 504 }
175f9c1b 505
175f9c1b
JK
506 list_for_each_entry(stab, &qdisc_stab_list, list) {
507 if (memcmp(&stab->szopts, s, sizeof(*s)))
508 continue;
509 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
510 continue;
511 stab->refcnt++;
175f9c1b
JK
512 return stab;
513 }
514
175f9c1b
JK
515 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
516 if (!stab)
517 return ERR_PTR(-ENOMEM);
518
519 stab->refcnt = 1;
520 stab->szopts = *s;
521 if (tsize > 0)
522 memcpy(stab->data, tab, tsize * sizeof(u16));
523
175f9c1b 524 list_add_tail(&stab->list, &qdisc_stab_list);
175f9c1b
JK
525
526 return stab;
527}
528
529void qdisc_put_stab(struct qdisc_size_table *tab)
530{
531 if (!tab)
532 return;
533
175f9c1b
JK
534 if (--tab->refcnt == 0) {
535 list_del(&tab->list);
6e07902f 536 kfree_rcu(tab, rcu);
175f9c1b 537 }
175f9c1b
JK
538}
539EXPORT_SYMBOL(qdisc_put_stab);
540
541static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
542{
543 struct nlattr *nest;
544
ae0be8de 545 nest = nla_nest_start_noflag(skb, TCA_STAB);
3aa4614d
PM
546 if (nest == NULL)
547 goto nla_put_failure;
1b34ec43
DM
548 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
549 goto nla_put_failure;
175f9c1b
JK
550 nla_nest_end(skb, nest);
551
552 return skb->len;
553
554nla_put_failure:
555 return -1;
556}
557
5a7a5555
JHS
558void __qdisc_calculate_pkt_len(struct sk_buff *skb,
559 const struct qdisc_size_table *stab)
175f9c1b
JK
560{
561 int pkt_len, slot;
562
563 pkt_len = skb->len + stab->szopts.overhead;
564 if (unlikely(!stab->szopts.tsize))
565 goto out;
566
567 slot = pkt_len + stab->szopts.cell_align;
568 if (unlikely(slot < 0))
569 slot = 0;
570
571 slot >>= stab->szopts.cell_log;
572 if (likely(slot < stab->szopts.tsize))
573 pkt_len = stab->data[slot];
574 else
575 pkt_len = stab->data[stab->szopts.tsize - 1] *
576 (slot / stab->szopts.tsize) +
577 stab->data[slot % stab->szopts.tsize];
578
579 pkt_len <<= stab->szopts.size_log;
580out:
581 if (unlikely(pkt_len < 1))
582 pkt_len = 1;
583 qdisc_skb_cb(skb)->pkt_len = pkt_len;
584}
a2da570d 585EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
175f9c1b 586
6e765a00 587void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
b00355db
JP
588{
589 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
cc7ec456
ED
590 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
591 txt, qdisc->ops->id, qdisc->handle >> 16);
b00355db
JP
592 qdisc->flags |= TCQ_F_WARN_NONWC;
593 }
594}
595EXPORT_SYMBOL(qdisc_warn_nonwc);
596
4179477f
PM
597static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
598{
599 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da3 600 timer);
4179477f 601
1e203c1a 602 rcu_read_lock();
8608db03 603 __netif_schedule(qdisc_root(wd->qdisc));
1e203c1a 604 rcu_read_unlock();
1936502d 605
4179477f
PM
606 return HRTIMER_NORESTART;
607}
608
860b642b
VCG
609void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
610 clockid_t clockid)
4179477f 611{
860b642b 612 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
2fbd3da3 613 wd->timer.function = qdisc_watchdog;
4179477f
PM
614 wd->qdisc = qdisc;
615}
860b642b
VCG
616EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
617
618void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
619{
620 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
621}
4179477f
PM
622EXPORT_SYMBOL(qdisc_watchdog_init);
623
efe074c2
ED
624void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
625 u64 delta_ns)
4179477f 626{
2540e051
JP
627 if (test_bit(__QDISC_STATE_DEACTIVATED,
628 &qdisc_root_sleeping(wd->qdisc)->state))
629 return;
630
b88948fb
ED
631 if (hrtimer_is_queued(&wd->timer)) {
632 /* If timer is already set in [expires, expires + delta_ns],
633 * do not reprogram it.
634 */
635 if (wd->last_expires - expires <= delta_ns)
636 return;
637 }
a9efad8b
ED
638
639 wd->last_expires = expires;
efe074c2
ED
640 hrtimer_start_range_ns(&wd->timer,
641 ns_to_ktime(expires),
642 delta_ns,
643 HRTIMER_MODE_ABS_PINNED);
4179477f 644}
efe074c2 645EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
4179477f
PM
646
647void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
648{
2fbd3da3 649 hrtimer_cancel(&wd->timer);
4179477f
PM
650}
651EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 652
a94f779f 653static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5 654{
6fe1c7a5 655 struct hlist_head *h;
9695fe6f 656 unsigned int i;
6fe1c7a5 657
9695fe6f 658 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
6fe1c7a5
PM
659
660 if (h != NULL) {
661 for (i = 0; i < n; i++)
662 INIT_HLIST_HEAD(&h[i]);
663 }
664 return h;
665}
666
6fe1c7a5
PM
667void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
668{
669 struct Qdisc_class_common *cl;
b67bfe0d 670 struct hlist_node *next;
6fe1c7a5
PM
671 struct hlist_head *nhash, *ohash;
672 unsigned int nsize, nmask, osize;
673 unsigned int i, h;
674
675 /* Rehash when load factor exceeds 0.75 */
676 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
677 return;
678 nsize = clhash->hashsize * 2;
679 nmask = nsize - 1;
680 nhash = qdisc_class_hash_alloc(nsize);
681 if (nhash == NULL)
682 return;
683
684 ohash = clhash->hash;
685 osize = clhash->hashsize;
686
687 sch_tree_lock(sch);
688 for (i = 0; i < osize; i++) {
b67bfe0d 689 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
6fe1c7a5
PM
690 h = qdisc_class_hash(cl->classid, nmask);
691 hlist_add_head(&cl->hnode, &nhash[h]);
692 }
693 }
694 clhash->hash = nhash;
695 clhash->hashsize = nsize;
696 clhash->hashmask = nmask;
697 sch_tree_unlock(sch);
698
9695fe6f 699 kvfree(ohash);
6fe1c7a5
PM
700}
701EXPORT_SYMBOL(qdisc_class_hash_grow);
702
703int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
704{
705 unsigned int size = 4;
706
707 clhash->hash = qdisc_class_hash_alloc(size);
ac8ef4ab 708 if (!clhash->hash)
6fe1c7a5
PM
709 return -ENOMEM;
710 clhash->hashsize = size;
711 clhash->hashmask = size - 1;
712 clhash->hashelems = 0;
713 return 0;
714}
715EXPORT_SYMBOL(qdisc_class_hash_init);
716
717void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
718{
9695fe6f 719 kvfree(clhash->hash);
6fe1c7a5
PM
720}
721EXPORT_SYMBOL(qdisc_class_hash_destroy);
722
723void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
724 struct Qdisc_class_common *cl)
725{
726 unsigned int h;
727
728 INIT_HLIST_NODE(&cl->hnode);
729 h = qdisc_class_hash(cl->classid, clhash->hashmask);
730 hlist_add_head(&cl->hnode, &clhash->hash[h]);
731 clhash->hashelems++;
732}
733EXPORT_SYMBOL(qdisc_class_hash_insert);
734
735void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
736 struct Qdisc_class_common *cl)
737{
738 hlist_del(&cl->hnode);
739 clhash->hashelems--;
740}
741EXPORT_SYMBOL(qdisc_class_hash_remove);
742
fa0f5aa7
ED
743/* Allocate an unique handle from space managed by kernel
744 * Possible range is [8000-FFFF]:0000 (0x8000 values)
745 */
1da177e4
LT
746static u32 qdisc_alloc_handle(struct net_device *dev)
747{
fa0f5aa7 748 int i = 0x8000;
1da177e4
LT
749 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
750
751 do {
752 autohandle += TC_H_MAKE(0x10000U, 0);
753 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
754 autohandle = TC_H_MAKE(0x80000000U, 0);
fa0f5aa7
ED
755 if (!qdisc_lookup(dev, autohandle))
756 return autohandle;
757 cond_resched();
758 } while (--i > 0);
1da177e4 759
fa0f5aa7 760 return 0;
1da177e4
LT
761}
762
5f2939d9 763void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
43effa1e 764{
fd5ac14a 765 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
20fea08b 766 const struct Qdisc_class_ops *cops;
43effa1e
PM
767 unsigned long cl;
768 u32 parentid;
95946658 769 bool notify;
2c8c8e6f 770 int drops;
43effa1e 771
2ccccf5f 772 if (n == 0 && len == 0)
43effa1e 773 return;
2c8c8e6f 774 drops = max_t(int, n, 0);
4eaf3b84 775 rcu_read_lock();
43effa1e 776 while ((parentid = sch->parent)) {
066a3b5b 777 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
4eaf3b84 778 break;
066a3b5b 779
4eaf3b84
ED
780 if (sch->flags & TCQ_F_NOPARENT)
781 break;
95946658
KK
782 /* Notify parent qdisc only if child qdisc becomes empty.
783 *
784 * If child was empty even before update then backlog
785 * counter is screwed and we skip notification because
786 * parent class is already passive.
fd5ac14a
NF
787 *
788 * If the original child was offloaded then it is allowed
789 * to be seem as empty, so the parent is notified anyway.
95946658 790 */
fd5ac14a
NF
791 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
792 !qdisc_is_offloaded);
4eaf3b84 793 /* TODO: perform the search on a per txq basis */
5ce2d488 794 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa 795 if (sch == NULL) {
4eaf3b84
ED
796 WARN_ON_ONCE(parentid != TC_H_ROOT);
797 break;
ffc8fefa 798 }
43effa1e 799 cops = sch->ops->cl_ops;
95946658 800 if (notify && cops->qlen_notify) {
143976ce 801 cl = cops->find(sch, parentid);
43effa1e 802 cops->qlen_notify(sch, cl);
43effa1e
PM
803 }
804 sch->q.qlen -= n;
2ccccf5f 805 sch->qstats.backlog -= len;
25331d6c 806 __qdisc_qstats_drop(sch, drops);
43effa1e 807 }
4eaf3b84 808 rcu_read_unlock();
43effa1e 809}
2ccccf5f 810EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
1da177e4 811
b592843c
JK
812int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
813 void *type_data)
814{
815 struct net_device *dev = qdisc_dev(sch);
816 int err;
817
818 sch->flags &= ~TCQ_F_OFFLOADED;
819 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
820 return 0;
821
822 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
823 if (err == -EOPNOTSUPP)
824 return 0;
825
826 if (!err)
827 sch->flags |= TCQ_F_OFFLOADED;
828
829 return err;
830}
831EXPORT_SYMBOL(qdisc_offload_dump_helper);
832
bfaee911
JK
833void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
834 struct Qdisc *new, struct Qdisc *old,
835 enum tc_setup_type type, void *type_data,
836 struct netlink_ext_ack *extack)
837{
838 bool any_qdisc_is_offloaded;
839 int err;
840
841 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
842 return;
843
844 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
845
846 /* Don't report error if the graft is part of destroy operation. */
847 if (!err || !new || new == &noop_qdisc)
848 return;
849
850 /* Don't report error if the parent, the old child and the new
851 * one are not offloaded.
852 */
853 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
854 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
855 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
856
857 if (any_qdisc_is_offloaded)
858 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
859}
860EXPORT_SYMBOL(qdisc_offload_graft_helper);
861
98b0e5f6
JK
862static void qdisc_offload_graft_root(struct net_device *dev,
863 struct Qdisc *new, struct Qdisc *old,
864 struct netlink_ext_ack *extack)
865{
866 struct tc_root_qopt_offload graft_offload = {
867 .command = TC_ROOT_GRAFT,
868 .handle = new ? new->handle : 0,
869 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
870 (old && old->flags & TCQ_F_INGRESS),
871 };
872
873 qdisc_offload_graft_helper(dev, NULL, new, old,
874 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
875}
876
27d7f07c
WC
877static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
878 u32 portid, u32 seq, u16 flags, int event)
879{
880 struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL;
881 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
882 struct tcmsg *tcm;
883 struct nlmsghdr *nlh;
884 unsigned char *b = skb_tail_pointer(skb);
885 struct gnet_dump d;
886 struct qdisc_size_table *stab;
d47a6b0e 887 u32 block_index;
27d7f07c
WC
888 __u32 qlen;
889
890 cond_resched();
891 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
892 if (!nlh)
893 goto out_nlmsg_trim;
894 tcm = nlmsg_data(nlh);
895 tcm->tcm_family = AF_UNSPEC;
896 tcm->tcm__pad1 = 0;
897 tcm->tcm__pad2 = 0;
898 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
899 tcm->tcm_parent = clid;
900 tcm->tcm_handle = q->handle;
901 tcm->tcm_info = refcount_read(&q->refcnt);
902 if (nla_put_string(skb, TCA_KIND, q->ops->id))
903 goto nla_put_failure;
d47a6b0e
JP
904 if (q->ops->ingress_block_get) {
905 block_index = q->ops->ingress_block_get(q);
906 if (block_index &&
907 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
908 goto nla_put_failure;
909 }
910 if (q->ops->egress_block_get) {
911 block_index = q->ops->egress_block_get(q);
912 if (block_index &&
913 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
914 goto nla_put_failure;
915 }
27d7f07c
WC
916 if (q->ops->dump && q->ops->dump(q, skb) < 0)
917 goto nla_put_failure;
44edf2f8
NF
918 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
919 goto nla_put_failure;
7e66016f 920 qlen = qdisc_qlen_sum(q);
27d7f07c
WC
921
922 stab = rtnl_dereference(q->stab);
923 if (stab && qdisc_dump_stab(skb, stab) < 0)
924 goto nla_put_failure;
925
926 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
927 NULL, &d, TCA_PAD) < 0)
928 goto nla_put_failure;
929
930 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
931 goto nla_put_failure;
932
933 if (qdisc_is_percpu_stats(q)) {
934 cpu_bstats = q->cpu_bstats;
935 cpu_qstats = q->cpu_qstats;
936 }
937
938 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
939 &d, cpu_bstats, &q->bstats) < 0 ||
940 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
941 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
942 goto nla_put_failure;
943
944 if (gnet_stats_finish_copy(&d) < 0)
945 goto nla_put_failure;
946
947 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
948 return skb->len;
949
950out_nlmsg_trim:
951nla_put_failure:
952 nlmsg_trim(skb, b);
953 return -1;
954}
955
956static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
957{
958 if (q->flags & TCQ_F_BUILTIN)
959 return true;
960 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
961 return true;
962
963 return false;
964}
965
966static int qdisc_notify(struct net *net, struct sk_buff *oskb,
967 struct nlmsghdr *n, u32 clid,
968 struct Qdisc *old, struct Qdisc *new)
969{
970 struct sk_buff *skb;
971 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
972
973 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
974 if (!skb)
975 return -ENOBUFS;
976
977 if (old && !tc_qdisc_dump_ignore(old, false)) {
978 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
979 0, RTM_DELQDISC) < 0)
980 goto err_out;
981 }
982 if (new && !tc_qdisc_dump_ignore(new, false)) {
983 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
984 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
985 goto err_out;
986 }
987
988 if (skb->len)
989 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
990 n->nlmsg_flags & NLM_F_ECHO);
991
992err_out:
993 kfree_skb(skb);
994 return -EINVAL;
995}
996
7316ae88
TG
997static void notify_and_destroy(struct net *net, struct sk_buff *skb,
998 struct nlmsghdr *n, u32 clid,
99194cff
DM
999 struct Qdisc *old, struct Qdisc *new)
1000{
1001 if (new || old)
7316ae88 1002 qdisc_notify(net, skb, n, clid, old, new);
1da177e4 1003
4d8863a2 1004 if (old)
86bd446b 1005 qdisc_put(old);
99194cff
DM
1006}
1007
8a53e616
PA
1008static void qdisc_clear_nolock(struct Qdisc *sch)
1009{
1010 sch->flags &= ~TCQ_F_NOLOCK;
1011 if (!(sch->flags & TCQ_F_CPUSTATS))
1012 return;
1013
1014 free_percpu(sch->cpu_bstats);
1015 free_percpu(sch->cpu_qstats);
1016 sch->cpu_bstats = NULL;
1017 sch->cpu_qstats = NULL;
1018 sch->flags &= ~TCQ_F_CPUSTATS;
1019}
1020
99194cff
DM
1021/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1022 * to device "dev".
1023 *
1024 * When appropriate send a netlink notification using 'skb'
1025 * and "n".
1026 *
1027 * On success, destroy old qdisc.
1da177e4
LT
1028 */
1029
1030static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff 1031 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
09215598
AA
1032 struct Qdisc *new, struct Qdisc *old,
1033 struct netlink_ext_ack *extack)
1da177e4 1034{
99194cff 1035 struct Qdisc *q = old;
7316ae88 1036 struct net *net = dev_net(dev);
1da177e4 1037
10297b99 1038 if (parent == NULL) {
99194cff
DM
1039 unsigned int i, num_q, ingress;
1040
1041 ingress = 0;
1042 num_q = dev->num_tx_queues;
8d50b53d
DM
1043 if ((q && q->flags & TCQ_F_INGRESS) ||
1044 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
1045 num_q = 1;
1046 ingress = 1;
09215598
AA
1047 if (!dev_ingress_queue(dev)) {
1048 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
24824a09 1049 return -ENOENT;
09215598 1050 }
99194cff
DM
1051 }
1052
1053 if (dev->flags & IFF_UP)
1054 dev_deactivate(dev);
1055
98b0e5f6
JK
1056 qdisc_offload_graft_root(dev, new, old, extack);
1057
86e363dc
WC
1058 if (new && new->ops->attach)
1059 goto skip;
6ec1c69a 1060
99194cff 1061 for (i = 0; i < num_q; i++) {
24824a09 1062 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
99194cff
DM
1063
1064 if (!ingress)
1065 dev_queue = netdev_get_tx_queue(dev, i);
1066
8d50b53d
DM
1067 old = dev_graft_qdisc(dev_queue, new);
1068 if (new && i > 0)
551143d8 1069 qdisc_refcount_inc(new);
8d50b53d 1070
036d6a67 1071 if (!ingress)
86bd446b 1072 qdisc_put(old);
1da177e4 1073 }
99194cff 1074
86e363dc 1075skip:
036d6a67 1076 if (!ingress) {
7316ae88
TG
1077 notify_and_destroy(net, skb, n, classid,
1078 dev->qdisc, new);
036d6a67 1079 if (new && !new->ops->attach)
551143d8 1080 qdisc_refcount_inc(new);
036d6a67 1081 dev->qdisc = new ? : &noop_qdisc;
86e363dc
WC
1082
1083 if (new && new->ops->attach)
1084 new->ops->attach(new);
036d6a67 1085 } else {
7316ae88 1086 notify_and_destroy(net, skb, n, classid, old, new);
036d6a67 1087 }
af356afa 1088
99194cff
DM
1089 if (dev->flags & IFF_UP)
1090 dev_activate(dev);
1da177e4 1091 } else {
20fea08b 1092 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
9da93ece
JK
1093 unsigned long cl;
1094 int err;
1da177e4 1095
c5ad119f 1096 /* Only support running class lockless if parent is lockless */
8eaf8d99 1097 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
8a53e616 1098 qdisc_clear_nolock(new);
c5ad119f 1099
9da93ece
JK
1100 if (!cops || !cops->graft)
1101 return -EOPNOTSUPP;
143976ce 1102
9da93ece
JK
1103 cl = cops->find(parent, classid);
1104 if (!cl) {
1105 NL_SET_ERR_MSG(extack, "Specified class not found");
1106 return -ENOENT;
1da177e4 1107 }
9da93ece
JK
1108
1109 err = cops->graft(parent, cl, new, &old, extack);
1110 if (err)
1111 return err;
1112 notify_and_destroy(net, skb, n, classid, old, new);
1da177e4 1113 }
9da93ece 1114 return 0;
1da177e4
LT
1115}
1116
d47a6b0e
JP
1117static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1118 struct netlink_ext_ack *extack)
1119{
1120 u32 block_index;
1121
1122 if (tca[TCA_INGRESS_BLOCK]) {
1123 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1124
1125 if (!block_index) {
1126 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1127 return -EINVAL;
1128 }
1129 if (!sch->ops->ingress_block_set) {
1130 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1131 return -EOPNOTSUPP;
1132 }
1133 sch->ops->ingress_block_set(sch, block_index);
1134 }
1135 if (tca[TCA_EGRESS_BLOCK]) {
1136 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1137
1138 if (!block_index) {
1139 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1140 return -EINVAL;
1141 }
1142 if (!sch->ops->egress_block_set) {
1143 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1144 return -EOPNOTSUPP;
1145 }
1146 sch->ops->egress_block_set(sch, block_index);
1147 }
1148 return 0;
1149}
1150
1da177e4
LT
1151/*
1152 Allocate and initialize new qdisc.
1153
1154 Parameters are passed via opt.
1155 */
1156
5a7a5555
JHS
1157static struct Qdisc *qdisc_create(struct net_device *dev,
1158 struct netdev_queue *dev_queue,
1159 struct Qdisc *p, u32 parent, u32 handle,
09215598
AA
1160 struct nlattr **tca, int *errp,
1161 struct netlink_ext_ack *extack)
1da177e4
LT
1162{
1163 int err;
1e90474c 1164 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
1165 struct Qdisc *sch;
1166 struct Qdisc_ops *ops;
175f9c1b 1167 struct qdisc_size_table *stab;
1da177e4
LT
1168
1169 ops = qdisc_lookup_ops(kind);
95a5afca 1170#ifdef CONFIG_MODULES
1da177e4
LT
1171 if (ops == NULL && kind != NULL) {
1172 char name[IFNAMSIZ];
872f6903 1173 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1da177e4
LT
1174 /* We dropped the RTNL semaphore in order to
1175 * perform the module load. So, even if we
1176 * succeeded in loading the module we have to
1177 * tell the caller to replay the request. We
1178 * indicate this using -EAGAIN.
1179 * We replay the request because the device may
1180 * go away in the mean time.
1181 */
1182 rtnl_unlock();
1183 request_module("sch_%s", name);
1184 rtnl_lock();
1185 ops = qdisc_lookup_ops(kind);
1186 if (ops != NULL) {
1187 /* We will try again qdisc_lookup_ops,
1188 * so don't keep a reference.
1189 */
1190 module_put(ops->owner);
1191 err = -EAGAIN;
1192 goto err_out;
1193 }
1194 }
1195 }
1196#endif
1197
b9e2cc0f 1198 err = -ENOENT;
09215598
AA
1199 if (!ops) {
1200 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1da177e4 1201 goto err_out;
09215598 1202 }
1da177e4 1203
d0bd684d 1204 sch = qdisc_alloc(dev_queue, ops, extack);
3d54b82f
TG
1205 if (IS_ERR(sch)) {
1206 err = PTR_ERR(sch);
1da177e4 1207 goto err_out2;
3d54b82f 1208 }
1da177e4 1209
ffc8fefa
PM
1210 sch->parent = parent;
1211
3d54b82f 1212 if (handle == TC_H_INGRESS) {
1da177e4 1213 sch->flags |= TCQ_F_INGRESS;
3d54b82f 1214 handle = TC_H_MAKE(TC_H_INGRESS, 0);
fd44de7c 1215 } else {
fd44de7c
PM
1216 if (handle == 0) {
1217 handle = qdisc_alloc_handle(dev);
aaeb1dea
IV
1218 if (handle == 0) {
1219 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1220 err = -ENOSPC;
fd44de7c 1221 goto err_out3;
aaeb1dea 1222 }
fd44de7c 1223 }
1abbe139 1224 if (!netif_is_multiqueue(dev))
225734de 1225 sch->flags |= TCQ_F_ONETXQUEUE;
1da177e4
LT
1226 }
1227
3d54b82f 1228 sch->handle = handle;
1da177e4 1229
84c46dd8
JDB
1230 /* This exist to keep backward compatible with a userspace
1231 * loophole, what allowed userspace to get IFF_NO_QUEUE
1232 * facility on older kernels by setting tx_queue_len=0 (prior
1233 * to qdisc init), and then forgot to reinit tx_queue_len
1234 * before again attaching a qdisc.
1235 */
1236 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1237 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1238 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1239 }
1240
d47a6b0e
JP
1241 err = qdisc_block_indexes_set(sch, tca, extack);
1242 if (err)
1243 goto err_out3;
1244
54160ef6 1245 if (ops->init) {
e63d7dfd 1246 err = ops->init(sch, tca[TCA_OPTIONS], extack);
54160ef6
AA
1247 if (err != 0)
1248 goto err_out5;
1249 }
22e0f8b9 1250
54160ef6 1251 if (tca[TCA_STAB]) {
09215598 1252 stab = qdisc_get_stab(tca[TCA_STAB], extack);
54160ef6
AA
1253 if (IS_ERR(stab)) {
1254 err = PTR_ERR(stab);
1255 goto err_out4;
023e09a7 1256 }
54160ef6
AA
1257 rcu_assign_pointer(sch->stab, stab);
1258 }
1259 if (tca[TCA_RATE]) {
1260 seqcount_t *running;
f6e0b239 1261
54160ef6 1262 err = -EOPNOTSUPP;
09215598
AA
1263 if (sch->flags & TCQ_F_MQROOT) {
1264 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
54160ef6 1265 goto err_out4;
09215598 1266 }
1da177e4 1267
54160ef6
AA
1268 if (sch->parent != TC_H_ROOT &&
1269 !(sch->flags & TCQ_F_INGRESS) &&
1270 (!p || !(p->flags & TCQ_F_MQROOT)))
1271 running = qdisc_root_sleeping_running(sch);
1272 else
1273 running = &sch->running;
1274
1275 err = gen_new_estimator(&sch->bstats,
1276 sch->cpu_bstats,
1277 &sch->rate_est,
1278 NULL,
1279 running,
1280 tca[TCA_RATE]);
09215598
AA
1281 if (err) {
1282 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
54160ef6 1283 goto err_out4;
09215598 1284 }
1da177e4 1285 }
54160ef6
AA
1286
1287 qdisc_hash_add(sch, false);
f5a7833e 1288 trace_qdisc_create(ops, dev, parent);
54160ef6
AA
1289
1290 return sch;
1291
1292err_out5:
87b60cfa 1293 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
c1a4872e
GF
1294 if (ops->destroy)
1295 ops->destroy(sch);
1da177e4
LT
1296err_out3:
1297 dev_put(dev);
81d947e2 1298 qdisc_free(sch);
1da177e4
LT
1299err_out2:
1300 module_put(ops->owner);
1301err_out:
1302 *errp = err;
1da177e4 1303 return NULL;
23bcf634
PM
1304
1305err_out4:
1306 /*
1307 * Any broken qdiscs that would require a ops->reset() here?
1308 * The qdisc was never in action so it shouldn't be necessary.
1309 */
a2da570d 1310 qdisc_put_stab(rtnl_dereference(sch->stab));
23bcf634
PM
1311 if (ops->destroy)
1312 ops->destroy(sch);
1313 goto err_out3;
1da177e4
LT
1314}
1315
09215598
AA
1316static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1317 struct netlink_ext_ack *extack)
1da177e4 1318{
a2da570d 1319 struct qdisc_size_table *ostab, *stab = NULL;
175f9c1b 1320 int err = 0;
1da177e4 1321
175f9c1b 1322 if (tca[TCA_OPTIONS]) {
09215598
AA
1323 if (!sch->ops->change) {
1324 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1da177e4 1325 return -EINVAL;
09215598 1326 }
d47a6b0e
JP
1327 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1328 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1329 return -EOPNOTSUPP;
1330 }
2030721c 1331 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1da177e4
LT
1332 if (err)
1333 return err;
1334 }
175f9c1b
JK
1335
1336 if (tca[TCA_STAB]) {
09215598 1337 stab = qdisc_get_stab(tca[TCA_STAB], extack);
175f9c1b
JK
1338 if (IS_ERR(stab))
1339 return PTR_ERR(stab);
1340 }
1341
a2da570d
ED
1342 ostab = rtnl_dereference(sch->stab);
1343 rcu_assign_pointer(sch->stab, stab);
1344 qdisc_put_stab(ostab);
175f9c1b 1345
23bcf634 1346 if (tca[TCA_RATE]) {
71bcb09a
SH
1347 /* NB: ignores errors from replace_estimator
1348 because change can't be undone. */
23bcf634
PM
1349 if (sch->flags & TCQ_F_MQROOT)
1350 goto out;
22e0f8b9
JF
1351 gen_replace_estimator(&sch->bstats,
1352 sch->cpu_bstats,
1353 &sch->rate_est,
edb09eb1
ED
1354 NULL,
1355 qdisc_root_sleeping_running(sch),
22e0f8b9 1356 tca[TCA_RATE]);
23bcf634
PM
1357 }
1358out:
1da177e4
LT
1359 return 0;
1360}
1361
cc7ec456
ED
1362struct check_loop_arg {
1363 struct qdisc_walker w;
1da177e4
LT
1364 struct Qdisc *p;
1365 int depth;
1366};
1367
5a7a5555
JHS
1368static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1369 struct qdisc_walker *w);
1da177e4
LT
1370
1371static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1372{
1373 struct check_loop_arg arg;
1374
1375 if (q->ops->cl_ops == NULL)
1376 return 0;
1377
1378 arg.w.stop = arg.w.skip = arg.w.count = 0;
1379 arg.w.fn = check_loop_fn;
1380 arg.depth = depth;
1381 arg.p = p;
1382 q->ops->cl_ops->walk(q, &arg.w);
1383 return arg.w.stop ? -ELOOP : 0;
1384}
1385
1386static int
1387check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1388{
1389 struct Qdisc *leaf;
20fea08b 1390 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
1391 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1392
1393 leaf = cops->leaf(q, cl);
1394 if (leaf) {
1395 if (leaf == arg->p || arg->depth > 7)
1396 return -ELOOP;
1397 return check_loop(leaf, arg->p, arg->depth + 1);
1398 }
1399 return 0;
1400}
1401
8b4c3cdd 1402const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
6f96c3c6 1403 [TCA_KIND] = { .type = NLA_STRING },
8b4c3cdd
DA
1404 [TCA_RATE] = { .type = NLA_BINARY,
1405 .len = sizeof(struct tc_estimator) },
1406 [TCA_STAB] = { .type = NLA_NESTED },
1407 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1408 [TCA_CHAIN] = { .type = NLA_U32 },
1409 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1410 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1411};
1412
e331473f
DC
1413/*
1414 * Delete/get qdisc.
1415 */
1416
c21ef3e3
DA
1417static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1418 struct netlink_ext_ack *extack)
1da177e4 1419{
3b1e0a65 1420 struct net *net = sock_net(skb->sk);
02ef22ca 1421 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1422 struct nlattr *tca[TCA_MAX + 1];
1da177e4 1423 struct net_device *dev;
de179c8c 1424 u32 clid;
1da177e4
LT
1425 struct Qdisc *q = NULL;
1426 struct Qdisc *p = NULL;
1427 int err;
1428
4e8bbb81 1429 if ((n->nlmsg_type != RTM_GETQDISC) &&
5f013c9b 1430 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1431 return -EPERM;
1432
8cb08174
JB
1433 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1434 rtm_tca_policy, extack);
1e90474c
PM
1435 if (err < 0)
1436 return err;
1437
de179c8c
H
1438 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1439 if (!dev)
1440 return -ENODEV;
1441
1442 clid = tcm->tcm_parent;
1da177e4
LT
1443 if (clid) {
1444 if (clid != TC_H_ROOT) {
1445 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
cc7ec456 1446 p = qdisc_lookup(dev, TC_H_MAJ(clid));
09215598
AA
1447 if (!p) {
1448 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1da177e4 1449 return -ENOENT;
09215598 1450 }
1da177e4 1451 q = qdisc_leaf(p, clid);
cc7ec456
ED
1452 } else if (dev_ingress_queue(dev)) {
1453 q = dev_ingress_queue(dev)->qdisc_sleeping;
10297b99 1454 }
1da177e4 1455 } else {
af356afa 1456 q = dev->qdisc;
1da177e4 1457 }
09215598
AA
1458 if (!q) {
1459 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1da177e4 1460 return -ENOENT;
09215598 1461 }
1da177e4 1462
09215598
AA
1463 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1464 NL_SET_ERR_MSG(extack, "Invalid handle");
1da177e4 1465 return -EINVAL;
09215598 1466 }
1da177e4 1467 } else {
cc7ec456 1468 q = qdisc_lookup(dev, tcm->tcm_handle);
09215598
AA
1469 if (!q) {
1470 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1da177e4 1471 return -ENOENT;
09215598 1472 }
1da177e4
LT
1473 }
1474
09215598
AA
1475 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1476 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1477 return -EINVAL;
09215598 1478 }
1da177e4
LT
1479
1480 if (n->nlmsg_type == RTM_DELQDISC) {
09215598
AA
1481 if (!clid) {
1482 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1da177e4 1483 return -EINVAL;
09215598
AA
1484 }
1485 if (q->handle == 0) {
1486 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1da177e4 1487 return -ENOENT;
09215598
AA
1488 }
1489 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
cc7ec456 1490 if (err != 0)
1da177e4 1491 return err;
1da177e4 1492 } else {
7316ae88 1493 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1494 }
1495 return 0;
1496}
1497
1498/*
cc7ec456 1499 * Create/change qdisc.
1da177e4
LT
1500 */
1501
c21ef3e3
DA
1502static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1503 struct netlink_ext_ack *extack)
1da177e4 1504{
3b1e0a65 1505 struct net *net = sock_net(skb->sk);
1da177e4 1506 struct tcmsg *tcm;
1e90474c 1507 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1508 struct net_device *dev;
1509 u32 clid;
1510 struct Qdisc *q, *p;
1511 int err;
1512
5f013c9b 1513 if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
1514 return -EPERM;
1515
1da177e4
LT
1516replay:
1517 /* Reinit, just in case something touches this. */
8cb08174
JB
1518 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1519 rtm_tca_policy, extack);
de179c8c
H
1520 if (err < 0)
1521 return err;
1522
02ef22ca 1523 tcm = nlmsg_data(n);
1da177e4
LT
1524 clid = tcm->tcm_parent;
1525 q = p = NULL;
1526
cc7ec456
ED
1527 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1528 if (!dev)
1da177e4
LT
1529 return -ENODEV;
1530
1e90474c 1531
1da177e4
LT
1532 if (clid) {
1533 if (clid != TC_H_ROOT) {
1534 if (clid != TC_H_INGRESS) {
cc7ec456 1535 p = qdisc_lookup(dev, TC_H_MAJ(clid));
09215598
AA
1536 if (!p) {
1537 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1da177e4 1538 return -ENOENT;
09215598 1539 }
1da177e4 1540 q = qdisc_leaf(p, clid);
cc7ec456
ED
1541 } else if (dev_ingress_queue_create(dev)) {
1542 q = dev_ingress_queue(dev)->qdisc_sleeping;
1da177e4
LT
1543 }
1544 } else {
af356afa 1545 q = dev->qdisc;
1da177e4
LT
1546 }
1547
1548 /* It may be default qdisc, ignore it */
1549 if (q && q->handle == 0)
1550 q = NULL;
1551
1552 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1553 if (tcm->tcm_handle) {
09215598
AA
1554 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1555 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1da177e4 1556 return -EEXIST;
09215598
AA
1557 }
1558 if (TC_H_MIN(tcm->tcm_handle)) {
1559 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1da177e4 1560 return -EINVAL;
09215598 1561 }
cc7ec456 1562 q = qdisc_lookup(dev, tcm->tcm_handle);
8ec69574 1563 if (!q)
1da177e4 1564 goto create_n_graft;
09215598
AA
1565 if (n->nlmsg_flags & NLM_F_EXCL) {
1566 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1da177e4 1567 return -EEXIST;
09215598 1568 }
0ac4bd68 1569 if (tca[TCA_KIND] &&
09215598
AA
1570 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1571 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1572 return -EINVAL;
09215598 1573 }
1da177e4 1574 if (q == p ||
09215598
AA
1575 (p && check_loop(q, p, 0))) {
1576 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1da177e4 1577 return -ELOOP;
09215598 1578 }
551143d8 1579 qdisc_refcount_inc(q);
1da177e4
LT
1580 goto graft;
1581 } else {
cc7ec456 1582 if (!q)
1da177e4
LT
1583 goto create_n_graft;
1584
1585 /* This magic test requires explanation.
1586 *
1587 * We know, that some child q is already
1588 * attached to this parent and have choice:
1589 * either to change it or to create/graft new one.
1590 *
1591 * 1. We are allowed to create/graft only
1592 * if CREATE and REPLACE flags are set.
1593 *
1594 * 2. If EXCL is set, requestor wanted to say,
1595 * that qdisc tcm_handle is not expected
1596 * to exist, so that we choose create/graft too.
1597 *
1598 * 3. The last case is when no flags are set.
1599 * Alas, it is sort of hole in API, we
1600 * cannot decide what to do unambiguously.
1601 * For now we select create/graft, if
1602 * user gave KIND, which does not match existing.
1603 */
cc7ec456
ED
1604 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1605 (n->nlmsg_flags & NLM_F_REPLACE) &&
1606 ((n->nlmsg_flags & NLM_F_EXCL) ||
1e90474c
PM
1607 (tca[TCA_KIND] &&
1608 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1609 goto create_n_graft;
1610 }
1611 }
1612 } else {
09215598
AA
1613 if (!tcm->tcm_handle) {
1614 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1da177e4 1615 return -EINVAL;
09215598 1616 }
1da177e4
LT
1617 q = qdisc_lookup(dev, tcm->tcm_handle);
1618 }
1619
1620 /* Change qdisc parameters */
09215598
AA
1621 if (!q) {
1622 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1da177e4 1623 return -ENOENT;
09215598
AA
1624 }
1625 if (n->nlmsg_flags & NLM_F_EXCL) {
1626 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1da177e4 1627 return -EEXIST;
09215598
AA
1628 }
1629 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1630 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1631 return -EINVAL;
09215598
AA
1632 }
1633 err = qdisc_change(q, tca, extack);
1da177e4 1634 if (err == 0)
7316ae88 1635 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1636 return err;
1637
1638create_n_graft:
09215598
AA
1639 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1640 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1da177e4 1641 return -ENOENT;
09215598 1642 }
24824a09 1643 if (clid == TC_H_INGRESS) {
09215598 1644 if (dev_ingress_queue(dev)) {
24824a09
ED
1645 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1646 tcm->tcm_parent, tcm->tcm_parent,
09215598
AA
1647 tca, &err, extack);
1648 } else {
1649 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
24824a09 1650 err = -ENOENT;
09215598 1651 }
24824a09 1652 } else {
926e61b7 1653 struct netdev_queue *dev_queue;
6ec1c69a
DM
1654
1655 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
926e61b7
JP
1656 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1657 else if (p)
1658 dev_queue = p->dev_queue;
1659 else
1660 dev_queue = netdev_get_tx_queue(dev, 0);
6ec1c69a 1661
926e61b7 1662 q = qdisc_create(dev, dev_queue, p,
bb949fbd 1663 tcm->tcm_parent, tcm->tcm_handle,
09215598 1664 tca, &err, extack);
6ec1c69a 1665 }
1da177e4
LT
1666 if (q == NULL) {
1667 if (err == -EAGAIN)
1668 goto replay;
1669 return err;
1670 }
1671
1672graft:
09215598 1673 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
e5befbd9
IJ
1674 if (err) {
1675 if (q)
86bd446b 1676 qdisc_put(q);
e5befbd9 1677 return err;
1da177e4 1678 }
e5befbd9 1679
1da177e4
LT
1680 return 0;
1681}
1682
30723673
DM
1683static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1684 struct netlink_callback *cb,
49b49971
JK
1685 int *q_idx_p, int s_q_idx, bool recur,
1686 bool dump_invisible)
30723673
DM
1687{
1688 int ret = 0, q_idx = *q_idx_p;
1689 struct Qdisc *q;
59cc1f61 1690 int b;
30723673
DM
1691
1692 if (!root)
1693 return 0;
1694
1695 q = root;
1696 if (q_idx < s_q_idx) {
1697 q_idx++;
1698 } else {
49b49971 1699 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1700 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1701 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1702 RTM_NEWQDISC) <= 0)
30723673
DM
1703 goto done;
1704 q_idx++;
1705 }
69012ae4 1706
ea327469
JK
1707 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1708 * itself has already been dumped.
1709 *
1710 * If we've already dumped the top-level (ingress) qdisc above and the global
1711 * qdisc hashtable, we don't want to hit it again
1712 */
1713 if (!qdisc_dev(root) || !recur)
69012ae4
JK
1714 goto out;
1715
59cc1f61 1716 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1717 if (q_idx < s_q_idx) {
1718 q_idx++;
1719 continue;
1720 }
49b49971 1721 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1722 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1723 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1724 RTM_NEWQDISC) <= 0)
30723673
DM
1725 goto done;
1726 q_idx++;
1727 }
1728
1729out:
1730 *q_idx_p = q_idx;
1731 return ret;
1732done:
1733 ret = -1;
1734 goto out;
1735}
1736
1da177e4
LT
1737static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1738{
3b1e0a65 1739 struct net *net = sock_net(skb->sk);
1da177e4
LT
1740 int idx, q_idx;
1741 int s_idx, s_q_idx;
1742 struct net_device *dev;
49b49971 1743 const struct nlmsghdr *nlh = cb->nlh;
49b49971
JK
1744 struct nlattr *tca[TCA_MAX + 1];
1745 int err;
1da177e4
LT
1746
1747 s_idx = cb->args[0];
1748 s_q_idx = q_idx = cb->args[1];
f1e9016d 1749
7562f876 1750 idx = 0;
15dc36eb 1751 ASSERT_RTNL();
49b49971 1752
8cb08174
JB
1753 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1754 rtm_tca_policy, cb->extack);
49b49971
JK
1755 if (err < 0)
1756 return err;
1757
15dc36eb 1758 for_each_netdev(net, dev) {
30723673
DM
1759 struct netdev_queue *dev_queue;
1760
1da177e4 1761 if (idx < s_idx)
7562f876 1762 goto cont;
1da177e4
LT
1763 if (idx > s_idx)
1764 s_q_idx = 0;
1da177e4 1765 q_idx = 0;
30723673 1766
5a7a5555 1767 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx,
49b49971 1768 true, tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1769 goto done;
1770
24824a09
ED
1771 dev_queue = dev_ingress_queue(dev);
1772 if (dev_queue &&
1773 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
49b49971
JK
1774 &q_idx, s_q_idx, false,
1775 tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1776 goto done;
1777
7562f876
PE
1778cont:
1779 idx++;
1da177e4
LT
1780 }
1781
1782done:
1da177e4
LT
1783 cb->args[0] = idx;
1784 cb->args[1] = q_idx;
1785
1786 return skb->len;
1787}
1788
1789
1790
1791/************************************************
1792 * Traffic classes manipulation. *
1793 ************************************************/
1794
27d7f07c
WC
1795static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1796 unsigned long cl,
1797 u32 portid, u32 seq, u16 flags, int event)
1798{
1799 struct tcmsg *tcm;
1800 struct nlmsghdr *nlh;
1801 unsigned char *b = skb_tail_pointer(skb);
1802 struct gnet_dump d;
1803 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1804
27d7f07c
WC
1805 cond_resched();
1806 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1807 if (!nlh)
1808 goto out_nlmsg_trim;
1809 tcm = nlmsg_data(nlh);
1810 tcm->tcm_family = AF_UNSPEC;
1811 tcm->tcm__pad1 = 0;
1812 tcm->tcm__pad2 = 0;
1813 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1814 tcm->tcm_parent = q->handle;
1815 tcm->tcm_handle = q->handle;
1816 tcm->tcm_info = 0;
1817 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1818 goto nla_put_failure;
1819 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1820 goto nla_put_failure;
1821
1822 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1823 NULL, &d, TCA_PAD) < 0)
1824 goto nla_put_failure;
1825
1826 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1827 goto nla_put_failure;
1828
1829 if (gnet_stats_finish_copy(&d) < 0)
1830 goto nla_put_failure;
1831
1832 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1833 return skb->len;
1834
1835out_nlmsg_trim:
1836nla_put_failure:
1837 nlmsg_trim(skb, b);
1838 return -1;
1839}
1840
1841static int tclass_notify(struct net *net, struct sk_buff *oskb,
1842 struct nlmsghdr *n, struct Qdisc *q,
1843 unsigned long cl, int event)
1844{
1845 struct sk_buff *skb;
1846 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
5b5f99b1 1847 int err = 0;
27d7f07c
WC
1848
1849 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1850 if (!skb)
1851 return -ENOBUFS;
1852
1853 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1854 kfree_skb(skb);
1855 return -EINVAL;
1856 }
1857
5b5f99b1
ZW
1858 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1859 n->nlmsg_flags & NLM_F_ECHO);
1860 if (err > 0)
1861 err = 0;
1862 return err;
27d7f07c 1863}
1da177e4 1864
14546ba1
WC
1865static int tclass_del_notify(struct net *net,
1866 const struct Qdisc_class_ops *cops,
1867 struct sk_buff *oskb, struct nlmsghdr *n,
1868 struct Qdisc *q, unsigned long cl)
1869{
1870 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1871 struct sk_buff *skb;
1872 int err = 0;
1873
1874 if (!cops->delete)
1875 return -EOPNOTSUPP;
1876
1877 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1878 if (!skb)
1879 return -ENOBUFS;
1880
1881 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1882 RTM_DELTCLASS) < 0) {
1883 kfree_skb(skb);
1884 return -EINVAL;
1885 }
1886
1887 err = cops->delete(q, cl);
1888 if (err) {
1889 kfree_skb(skb);
1890 return err;
1891 }
1892
5b5f99b1
ZW
1893 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1894 n->nlmsg_flags & NLM_F_ECHO);
1895 if (err > 0)
1896 err = 0;
1897 return err;
14546ba1
WC
1898}
1899
07d79fc7
CW
1900#ifdef CONFIG_NET_CLS
1901
1902struct tcf_bind_args {
1903 struct tcf_walker w;
2e24cd75 1904 unsigned long base;
07d79fc7 1905 unsigned long cl;
2e24cd75 1906 u32 classid;
07d79fc7
CW
1907};
1908
1909static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1910{
1911 struct tcf_bind_args *a = (void *)arg;
1912
1913 if (tp->ops->bind_class) {
74e3be60
JP
1914 struct Qdisc *q = tcf_block_q(tp->chain->block);
1915
1916 sch_tree_lock(q);
2e24cd75 1917 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
74e3be60 1918 sch_tree_unlock(q);
07d79fc7
CW
1919 }
1920 return 0;
1921}
1922
760d228e
CW
1923struct tc_bind_class_args {
1924 struct qdisc_walker w;
1925 unsigned long new_cl;
1926 u32 portid;
1927 u32 clid;
1928};
1929
1930static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1931 struct qdisc_walker *w)
07d79fc7 1932{
760d228e 1933 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
07d79fc7
CW
1934 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1935 struct tcf_block *block;
1936 struct tcf_chain *chain;
07d79fc7 1937
cbaacc4e 1938 block = cops->tcf_block(q, cl, NULL);
07d79fc7 1939 if (!block)
760d228e 1940 return 0;
bbf73830
VB
1941 for (chain = tcf_get_next_chain(block, NULL);
1942 chain;
1943 chain = tcf_get_next_chain(block, chain)) {
07d79fc7
CW
1944 struct tcf_proto *tp;
1945
0fca55ed
VB
1946 for (tp = tcf_get_next_proto(chain, NULL);
1947 tp; tp = tcf_get_next_proto(chain, tp)) {
07d79fc7
CW
1948 struct tcf_bind_args arg = {};
1949
1950 arg.w.fn = tcf_node_bind;
760d228e 1951 arg.classid = a->clid;
2e24cd75 1952 arg.base = cl;
760d228e 1953 arg.cl = a->new_cl;
12db03b6 1954 tp->ops->walk(tp, &arg.w, true);
07d79fc7
CW
1955 }
1956 }
760d228e
CW
1957
1958 return 0;
1959}
1960
1961static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1962 unsigned long new_cl)
1963{
1964 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1965 struct tc_bind_class_args args = {};
1966
1967 if (!cops->tcf_block)
1968 return;
1969 args.portid = portid;
1970 args.clid = clid;
1971 args.new_cl = new_cl;
1972 args.w.fn = tc_bind_class_walker;
1973 q->ops->cl_ops->walk(q, &args.w);
07d79fc7
CW
1974}
1975
1976#else
1977
1978static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1979 unsigned long new_cl)
1980{
1981}
1982
1983#endif
1984
c21ef3e3
DA
1985static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1986 struct netlink_ext_ack *extack)
1da177e4 1987{
3b1e0a65 1988 struct net *net = sock_net(skb->sk);
02ef22ca 1989 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1990 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1991 struct net_device *dev;
1992 struct Qdisc *q = NULL;
20fea08b 1993 const struct Qdisc_class_ops *cops;
1da177e4
LT
1994 unsigned long cl = 0;
1995 unsigned long new_cl;
de179c8c
H
1996 u32 portid;
1997 u32 clid;
1998 u32 qid;
1da177e4
LT
1999 int err;
2000
4e8bbb81 2001 if ((n->nlmsg_type != RTM_GETTCLASS) &&
5f013c9b 2002 !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
dfc47ef8
EB
2003 return -EPERM;
2004
8cb08174
JB
2005 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2006 rtm_tca_policy, extack);
1e90474c
PM
2007 if (err < 0)
2008 return err;
2009
de179c8c
H
2010 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2011 if (!dev)
2012 return -ENODEV;
2013
1da177e4
LT
2014 /*
2015 parent == TC_H_UNSPEC - unspecified parent.
2016 parent == TC_H_ROOT - class is root, which has no parent.
2017 parent == X:0 - parent is root class.
2018 parent == X:Y - parent is a node in hierarchy.
2019 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2020
2021 handle == 0:0 - generate handle from kernel pool.
2022 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2023 handle == X:Y - clear.
2024 handle == X:0 - root class.
2025 */
2026
2027 /* Step 1. Determine qdisc handle X:0 */
2028
de179c8c
H
2029 portid = tcm->tcm_parent;
2030 clid = tcm->tcm_handle;
2031 qid = TC_H_MAJ(clid);
2032
15e47304
EB
2033 if (portid != TC_H_ROOT) {
2034 u32 qid1 = TC_H_MAJ(portid);
1da177e4
LT
2035
2036 if (qid && qid1) {
2037 /* If both majors are known, they must be identical. */
2038 if (qid != qid1)
2039 return -EINVAL;
2040 } else if (qid1) {
2041 qid = qid1;
2042 } else if (qid == 0)
af356afa 2043 qid = dev->qdisc->handle;
1da177e4
LT
2044
2045 /* Now qid is genuine qdisc handle consistent
cc7ec456
ED
2046 * both with parent and child.
2047 *
15e47304 2048 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1da177e4 2049 */
15e47304
EB
2050 if (portid)
2051 portid = TC_H_MAKE(qid, portid);
1da177e4
LT
2052 } else {
2053 if (qid == 0)
af356afa 2054 qid = dev->qdisc->handle;
1da177e4
LT
2055 }
2056
2057 /* OK. Locate qdisc */
cc7ec456
ED
2058 q = qdisc_lookup(dev, qid);
2059 if (!q)
1da177e4
LT
2060 return -ENOENT;
2061
2062 /* An check that it supports classes */
2063 cops = q->ops->cl_ops;
2064 if (cops == NULL)
2065 return -EINVAL;
2066
2067 /* Now try to get class */
2068 if (clid == 0) {
15e47304 2069 if (portid == TC_H_ROOT)
1da177e4
LT
2070 clid = qid;
2071 } else
2072 clid = TC_H_MAKE(qid, clid);
2073
2074 if (clid)
143976ce 2075 cl = cops->find(q, clid);
1da177e4
LT
2076
2077 if (cl == 0) {
2078 err = -ENOENT;
cc7ec456
ED
2079 if (n->nlmsg_type != RTM_NEWTCLASS ||
2080 !(n->nlmsg_flags & NLM_F_CREATE))
1da177e4
LT
2081 goto out;
2082 } else {
2083 switch (n->nlmsg_type) {
10297b99 2084 case RTM_NEWTCLASS:
1da177e4 2085 err = -EEXIST;
cc7ec456 2086 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4
LT
2087 goto out;
2088 break;
2089 case RTM_DELTCLASS:
14546ba1 2090 err = tclass_del_notify(net, cops, skb, n, q, cl);
07d79fc7
CW
2091 /* Unbind the class with flilters with 0 */
2092 tc_bind_tclass(q, portid, clid, 0);
1da177e4
LT
2093 goto out;
2094 case RTM_GETTCLASS:
7316ae88 2095 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1da177e4
LT
2096 goto out;
2097 default:
2098 err = -EINVAL;
2099 goto out;
2100 }
2101 }
2102
d47a6b0e
JP
2103 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2104 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2105 return -EOPNOTSUPP;
2106 }
2107
1da177e4 2108 new_cl = cl;
de6d5cdf
PM
2109 err = -EOPNOTSUPP;
2110 if (cops->change)
793d81d6 2111 err = cops->change(q, clid, portid, tca, &new_cl, extack);
07d79fc7 2112 if (err == 0) {
7316ae88 2113 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
07d79fc7
CW
2114 /* We just create a new class, need to do reverse binding. */
2115 if (cl != new_cl)
2116 tc_bind_tclass(q, portid, clid, new_cl);
2117 }
1da177e4 2118out:
1da177e4
LT
2119 return err;
2120}
2121
cc7ec456
ED
2122struct qdisc_dump_args {
2123 struct qdisc_walker w;
2124 struct sk_buff *skb;
2125 struct netlink_callback *cb;
1da177e4
LT
2126};
2127
5a7a5555
JHS
2128static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2129 struct qdisc_walker *arg)
1da177e4
LT
2130{
2131 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2132
15e47304 2133 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
5a7a5555
JHS
2134 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2135 RTM_NEWTCLASS);
1da177e4
LT
2136}
2137
30723673
DM
2138static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2139 struct tcmsg *tcm, struct netlink_callback *cb,
2140 int *t_p, int s_t)
2141{
2142 struct qdisc_dump_args arg;
2143
49b49971 2144 if (tc_qdisc_dump_ignore(q, false) ||
30723673
DM
2145 *t_p < s_t || !q->ops->cl_ops ||
2146 (tcm->tcm_parent &&
2147 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2148 (*t_p)++;
2149 return 0;
2150 }
2151 if (*t_p > s_t)
2152 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2153 arg.w.fn = qdisc_class_dump;
2154 arg.skb = skb;
2155 arg.cb = cb;
2156 arg.w.stop = 0;
2157 arg.w.skip = cb->args[1];
2158 arg.w.count = 0;
2159 q->ops->cl_ops->walk(q, &arg.w);
2160 cb->args[1] = arg.w.count;
2161 if (arg.w.stop)
2162 return -1;
2163 (*t_p)++;
2164 return 0;
2165}
2166
2167static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2168 struct tcmsg *tcm, struct netlink_callback *cb,
2169 int *t_p, int s_t)
2170{
2171 struct Qdisc *q;
59cc1f61 2172 int b;
30723673
DM
2173
2174 if (!root)
2175 return 0;
2176
2177 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2178 return -1;
2179
69012ae4
JK
2180 if (!qdisc_dev(root))
2181 return 0;
2182
cb395b20
ED
2183 if (tcm->tcm_parent) {
2184 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
3c53ed8f
PS
2185 if (q && q != root &&
2186 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
cb395b20
ED
2187 return -1;
2188 return 0;
2189 }
59cc1f61 2190 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
2191 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2192 return -1;
2193 }
2194
2195 return 0;
2196}
2197
1da177e4
LT
2198static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2199{
02ef22ca 2200 struct tcmsg *tcm = nlmsg_data(cb->nlh);
3b1e0a65 2201 struct net *net = sock_net(skb->sk);
30723673 2202 struct netdev_queue *dev_queue;
1da177e4 2203 struct net_device *dev;
30723673 2204 int t, s_t;
1da177e4 2205
573ce260 2206 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1da177e4 2207 return 0;
cc7ec456
ED
2208 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2209 if (!dev)
1da177e4
LT
2210 return 0;
2211
2212 s_t = cb->args[0];
2213 t = 0;
2214
af356afa 2215 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
30723673
DM
2216 goto done;
2217
24824a09
ED
2218 dev_queue = dev_ingress_queue(dev);
2219 if (dev_queue &&
2220 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
2221 &t, s_t) < 0)
30723673 2222 goto done;
1da177e4 2223
30723673 2224done:
1da177e4
LT
2225 cb->args[0] = t;
2226
2227 dev_put(dev);
2228 return skb->len;
2229}
2230
1da177e4
LT
2231#ifdef CONFIG_PROC_FS
2232static int psched_show(struct seq_file *seq, void *v)
2233{
2234 seq_printf(seq, "%08x %08x %08x %08x\n",
ca44d6e6 2235 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca32 2236 1000000,
1e317688 2237 (u32)NSEC_PER_SEC / hrtimer_resolution);
1da177e4
LT
2238
2239 return 0;
2240}
2241
7316ae88
TG
2242static int __net_init psched_net_init(struct net *net)
2243{
2244 struct proc_dir_entry *e;
2245
3f3942ac 2246 e = proc_create_single("psched", 0, net->proc_net, psched_show);
7316ae88
TG
2247 if (e == NULL)
2248 return -ENOMEM;
2249
2250 return 0;
2251}
2252
2253static void __net_exit psched_net_exit(struct net *net)
2254{
ece31ffd 2255 remove_proc_entry("psched", net->proc_net);
7316ae88
TG
2256}
2257#else
2258static int __net_init psched_net_init(struct net *net)
2259{
2260 return 0;
2261}
2262
2263static void __net_exit psched_net_exit(struct net *net)
2264{
2265}
1da177e4
LT
2266#endif
2267
7316ae88
TG
2268static struct pernet_operations psched_net_ops = {
2269 .init = psched_net_init,
2270 .exit = psched_net_exit,
2271};
2272
1da177e4
LT
2273static int __init pktsched_init(void)
2274{
7316ae88
TG
2275 int err;
2276
2277 err = register_pernet_subsys(&psched_net_ops);
2278 if (err) {
cc7ec456 2279 pr_err("pktsched_init: "
7316ae88
TG
2280 "cannot initialize per netns operations\n");
2281 return err;
2282 }
2283
6da7c8fc 2284 register_qdisc(&pfifo_fast_ops);
1da177e4
LT
2285 register_qdisc(&pfifo_qdisc_ops);
2286 register_qdisc(&bfifo_qdisc_ops);
57dbb2d8 2287 register_qdisc(&pfifo_head_drop_qdisc_ops);
6ec1c69a 2288 register_qdisc(&mq_qdisc_ops);
d66d6c31 2289 register_qdisc(&noqueue_qdisc_ops);
1da177e4 2290
b97bac64
FW
2291 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2292 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
5a7a5555 2293 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
b97bac64
FW
2294 0);
2295 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2296 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
5a7a5555 2297 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
b97bac64 2298 0);
be577ddc 2299
1da177e4
LT
2300 return 0;
2301}
2302
2303subsys_initcall(pktsched_init);