Merge tag 'pwm/for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry...
[linux-block.git] / net / sched / sch_api.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * net/sched/sch_api.c Packet scheduler API.
4 *
1da177e4
LT
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Fixes:
8 *
9 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
10 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
11 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
12 */
13
1da177e4
LT
14#include <linux/module.h>
15#include <linux/types.h>
16#include <linux/kernel.h>
1da177e4 17#include <linux/string.h>
1da177e4 18#include <linux/errno.h>
1da177e4 19#include <linux/skbuff.h>
1da177e4
LT
20#include <linux/init.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
23#include <linux/kmod.h>
24#include <linux/list.h>
4179477f 25#include <linux/hrtimer.h>
5a0e3ad6 26#include <linux/slab.h>
59cc1f61 27#include <linux/hashtable.h>
1da177e4 28
457c4cbc 29#include <net/net_namespace.h>
b854272b 30#include <net/sock.h>
dc5fc579 31#include <net/netlink.h>
1da177e4 32#include <net/pkt_sched.h>
07d79fc7 33#include <net/pkt_cls.h>
7f0e8102 34#include <net/tc_wrapper.h>
1da177e4 35
f5a7833e
CW
36#include <trace/events/qdisc.h>
37
1da177e4
LT
38/*
39
40 Short review.
41 -------------
42
43 This file consists of two interrelated parts:
44
45 1. queueing disciplines manager frontend.
46 2. traffic classes manager frontend.
47
48 Generally, queueing discipline ("qdisc") is a black box,
49 which is able to enqueue packets and to dequeue them (when
50 device is ready to send something) in order and at times
51 determined by algorithm hidden in it.
52
53 qdisc's are divided to two categories:
54 - "queues", which have no internal structure visible from outside.
55 - "schedulers", which split all the packets to "traffic classes",
56 using "packet classifiers" (look at cls_api.c)
57
58 In turn, classes may have child qdiscs (as rule, queues)
59 attached to them etc. etc. etc.
60
61 The goal of the routines in this file is to translate
62 information supplied by user in the form of handles
63 to more intelligible for kernel form, to make some sanity
64 checks and part of work, which is common to all qdiscs
65 and to provide rtnetlink notifications.
66
67 All real intelligent work is done inside qdisc modules.
68
69
70
71 Every discipline has two major routines: enqueue and dequeue.
72
73 ---dequeue
74
75 dequeue usually returns a skb to send. It is allowed to return NULL,
76 but it does not mean that queue is empty, it just means that
77 discipline does not want to send anything this time.
78 Queue is really empty if q->q.qlen == 0.
79 For complicated disciplines with multiple queues q->q is not
80 real packet queue, but however q->q.qlen must be valid.
81
82 ---enqueue
83
84 enqueue returns 0, if packet was enqueued successfully.
85 If packet (this one or another one) was dropped, it returns
86 not zero error code.
87 NET_XMIT_DROP - this packet dropped
88 Expected action: do not backoff, but wait until queue will clear.
89 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
90 Expected action: backoff or ignore
1da177e4
LT
91
92 Auxiliary routines:
93
99c0db26
JP
94 ---peek
95
96 like dequeue but without removing a packet from the queue
97
1da177e4
LT
98 ---reset
99
100 returns qdisc to initial state: purge all buffers, clear all
101 timers, counters (except for statistics) etc.
102
103 ---init
104
105 initializes newly created qdisc.
106
107 ---destroy
108
109 destroys resources allocated by init and during lifetime of qdisc.
110
111 ---change
112
113 changes qdisc parameters.
114 */
115
116/* Protects list of registered TC modules. It is pure SMP lock. */
117static DEFINE_RWLOCK(qdisc_mod_lock);
118
119
120/************************************************
121 * Queueing disciplines manipulation. *
122 ************************************************/
123
124
125/* The list of all installed queueing disciplines. */
126
127static struct Qdisc_ops *qdisc_base;
128
21eb2189 129/* Register/unregister queueing discipline */
1da177e4
LT
130
131int register_qdisc(struct Qdisc_ops *qops)
132{
133 struct Qdisc_ops *q, **qp;
134 int rc = -EEXIST;
135
136 write_lock(&qdisc_mod_lock);
137 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
138 if (!strcmp(qops->id, q->id))
139 goto out;
140
141 if (qops->enqueue == NULL)
142 qops->enqueue = noop_qdisc_ops.enqueue;
99c0db26 143 if (qops->peek == NULL) {
68fd26b5 144 if (qops->dequeue == NULL)
99c0db26 145 qops->peek = noop_qdisc_ops.peek;
68fd26b5
JP
146 else
147 goto out_einval;
99c0db26 148 }
1da177e4
LT
149 if (qops->dequeue == NULL)
150 qops->dequeue = noop_qdisc_ops.dequeue;
151
68fd26b5
JP
152 if (qops->cl_ops) {
153 const struct Qdisc_class_ops *cops = qops->cl_ops;
154
143976ce 155 if (!(cops->find && cops->walk && cops->leaf))
68fd26b5
JP
156 goto out_einval;
157
6529eaba 158 if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf))
68fd26b5
JP
159 goto out_einval;
160 }
161
1da177e4
LT
162 qops->next = NULL;
163 *qp = qops;
164 rc = 0;
165out:
166 write_unlock(&qdisc_mod_lock);
167 return rc;
68fd26b5
JP
168
169out_einval:
170 rc = -EINVAL;
171 goto out;
1da177e4 172}
62e3ba1b 173EXPORT_SYMBOL(register_qdisc);
1da177e4 174
52327d2e 175void unregister_qdisc(struct Qdisc_ops *qops)
1da177e4
LT
176{
177 struct Qdisc_ops *q, **qp;
178 int err = -ENOENT;
179
180 write_lock(&qdisc_mod_lock);
cc7ec456 181 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
1da177e4
LT
182 if (q == qops)
183 break;
184 if (q) {
185 *qp = q->next;
186 q->next = NULL;
187 err = 0;
188 }
189 write_unlock(&qdisc_mod_lock);
52327d2e
ZS
190
191 WARN(err, "unregister qdisc(%s) failed\n", qops->id);
1da177e4 192}
62e3ba1b 193EXPORT_SYMBOL(unregister_qdisc);
1da177e4 194
6da7c8fc 195/* Get default qdisc if not otherwise specified */
196void qdisc_get_default(char *name, size_t len)
197{
198 read_lock(&qdisc_mod_lock);
92f24c6f 199 strscpy(name, default_qdisc_ops->id, len);
6da7c8fc 200 read_unlock(&qdisc_mod_lock);
201}
202
203static struct Qdisc_ops *qdisc_lookup_default(const char *name)
204{
205 struct Qdisc_ops *q = NULL;
206
207 for (q = qdisc_base; q; q = q->next) {
208 if (!strcmp(name, q->id)) {
209 if (!try_module_get(q->owner))
210 q = NULL;
211 break;
212 }
213 }
214
215 return q;
216}
217
218/* Set new default qdisc to use */
219int qdisc_set_default(const char *name)
220{
221 const struct Qdisc_ops *ops;
222
223 if (!capable(CAP_NET_ADMIN))
224 return -EPERM;
225
226 write_lock(&qdisc_mod_lock);
227 ops = qdisc_lookup_default(name);
228 if (!ops) {
229 /* Not found, drop lock and try to load module */
230 write_unlock(&qdisc_mod_lock);
231 request_module("sch_%s", name);
232 write_lock(&qdisc_mod_lock);
233
234 ops = qdisc_lookup_default(name);
235 }
236
237 if (ops) {
238 /* Set new default */
239 module_put(default_qdisc_ops->owner);
240 default_qdisc_ops = ops;
241 }
242 write_unlock(&qdisc_mod_lock);
243
244 return ops ? 0 : -ENOENT;
245}
246
8ea3e439 247#ifdef CONFIG_NET_SCH_DEFAULT
248/* Set default value from kernel config */
249static int __init sch_default_qdisc(void)
250{
251 return qdisc_set_default(CONFIG_DEFAULT_NET_SCH);
252}
253late_initcall(sch_default_qdisc);
254#endif
255
1da177e4 256/* We know handle. Find qdisc among all qdisc's attached to device
4eaf3b84
ED
257 * (root qdisc, all its children, children of children etc.)
258 * Note: caller either uses rtnl or rcu_read_lock()
1da177e4
LT
259 */
260
6113b748 261static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
8123b421
DM
262{
263 struct Qdisc *q;
264
69012ae4
JK
265 if (!qdisc_dev(root))
266 return (root->handle == handle ? root : NULL);
267
8123b421
DM
268 if (!(root->flags & TCQ_F_BUILTIN) &&
269 root->handle == handle)
270 return root;
271
a8b7b2d0
JP
272 hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle,
273 lockdep_rtnl_is_held()) {
8123b421
DM
274 if (q->handle == handle)
275 return q;
276 }
277 return NULL;
278}
279
49b49971 280void qdisc_hash_add(struct Qdisc *q, bool invisible)
f6e0b239 281{
37314363 282 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
4eaf3b84 283 ASSERT_RTNL();
59cc1f61 284 hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
49b49971
JK
285 if (invisible)
286 q->flags |= TCQ_F_INVISIBLE;
37314363 287 }
f6e0b239 288}
59cc1f61 289EXPORT_SYMBOL(qdisc_hash_add);
f6e0b239 290
59cc1f61 291void qdisc_hash_del(struct Qdisc *q)
f6e0b239 292{
4eaf3b84
ED
293 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
294 ASSERT_RTNL();
59cc1f61 295 hash_del_rcu(&q->hash);
4eaf3b84 296 }
f6e0b239 297}
59cc1f61 298EXPORT_SYMBOL(qdisc_hash_del);
f6e0b239 299
ead81cc5 300struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
1da177e4 301{
f6e0b239
JP
302 struct Qdisc *q;
303
50317fce
CW
304 if (!handle)
305 return NULL;
5891cd5e 306 q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle);
af356afa
PM
307 if (q)
308 goto out;
f6e0b239 309
24824a09
ED
310 if (dev_ingress_queue(dev))
311 q = qdisc_match_from_root(
312 dev_ingress_queue(dev)->qdisc_sleeping,
313 handle);
f6486d40 314out:
f6e0b239 315 return q;
1da177e4
LT
316}
317
3a7d0d07
VB
318struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
319{
320 struct netdev_queue *nq;
321 struct Qdisc *q;
322
323 if (!handle)
324 return NULL;
5891cd5e 325 q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle);
3a7d0d07
VB
326 if (q)
327 goto out;
328
329 nq = dev_ingress_queue_rcu(dev);
330 if (nq)
331 q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
332out:
333 return q;
334}
335
1da177e4
LT
336static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
337{
338 unsigned long cl;
20fea08b 339 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
1da177e4
LT
340
341 if (cops == NULL)
342 return NULL;
143976ce 343 cl = cops->find(p, classid);
1da177e4
LT
344
345 if (cl == 0)
346 return NULL;
2561f972 347 return cops->leaf(p, cl);
1da177e4
LT
348}
349
350/* Find queueing discipline by name */
351
1e90474c 352static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
1da177e4
LT
353{
354 struct Qdisc_ops *q = NULL;
355
356 if (kind) {
357 read_lock(&qdisc_mod_lock);
358 for (q = qdisc_base; q; q = q->next) {
1e90474c 359 if (nla_strcmp(kind, q->id) == 0) {
1da177e4
LT
360 if (!try_module_get(q->owner))
361 q = NULL;
362 break;
363 }
364 }
365 read_unlock(&qdisc_mod_lock);
366 }
367 return q;
368}
369
8a8e3d84
JDB
370/* The linklayer setting were not transferred from iproute2, in older
371 * versions, and the rate tables lookup systems have been dropped in
372 * the kernel. To keep backward compatible with older iproute2 tc
373 * utils, we detect the linklayer setting by detecting if the rate
374 * table were modified.
375 *
376 * For linklayer ATM table entries, the rate table will be aligned to
377 * 48 bytes, thus some table entries will contain the same value. The
378 * mpu (min packet unit) is also encoded into the old rate table, thus
379 * starting from the mpu, we find low and high table entries for
380 * mapping this cell. If these entries contain the same value, when
381 * the rate tables have been modified for linklayer ATM.
382 *
383 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
384 * and then roundup to the next cell, calc the table entry one below,
385 * and compare.
386 */
387static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
388{
389 int low = roundup(r->mpu, 48);
390 int high = roundup(low+1, 48);
391 int cell_low = low >> r->cell_log;
392 int cell_high = (high >> r->cell_log) - 1;
393
394 /* rtab is too inaccurate at rates > 100Mbit/s */
395 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
396 pr_debug("TC linklayer: Giving up ATM detection\n");
397 return TC_LINKLAYER_ETHERNET;
398 }
399
400 if ((cell_high > cell_low) && (cell_high < 256)
401 && (rtab[cell_low] == rtab[cell_high])) {
402 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
403 cell_low, cell_high, rtab[cell_high]);
404 return TC_LINKLAYER_ATM;
405 }
406 return TC_LINKLAYER_ETHERNET;
407}
408
1da177e4
LT
409static struct qdisc_rate_table *qdisc_rtab_list;
410
5a7a5555 411struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
e9bc3fa2
AA
412 struct nlattr *tab,
413 struct netlink_ext_ack *extack)
1da177e4
LT
414{
415 struct qdisc_rate_table *rtab;
416
e4bedf48
ED
417 if (tab == NULL || r->rate == 0 ||
418 r->cell_log == 0 || r->cell_log >= 32 ||
e9bc3fa2
AA
419 nla_len(tab) != TC_RTAB_SIZE) {
420 NL_SET_ERR_MSG(extack, "Invalid rate table parameters for searching");
40edeff6 421 return NULL;
e9bc3fa2 422 }
40edeff6 423
1da177e4 424 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
40edeff6
ED
425 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
426 !memcmp(&rtab->data, nla_data(tab), 1024)) {
1da177e4
LT
427 rtab->refcnt++;
428 return rtab;
429 }
430 }
431
1da177e4
LT
432 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
433 if (rtab) {
434 rtab->rate = *r;
435 rtab->refcnt = 1;
1e90474c 436 memcpy(rtab->data, nla_data(tab), 1024);
8a8e3d84
JDB
437 if (r->linklayer == TC_LINKLAYER_UNAWARE)
438 r->linklayer = __detect_linklayer(r, rtab->data);
1da177e4
LT
439 rtab->next = qdisc_rtab_list;
440 qdisc_rtab_list = rtab;
e9bc3fa2
AA
441 } else {
442 NL_SET_ERR_MSG(extack, "Failed to allocate new qdisc rate table");
1da177e4
LT
443 }
444 return rtab;
445}
62e3ba1b 446EXPORT_SYMBOL(qdisc_get_rtab);
1da177e4
LT
447
448void qdisc_put_rtab(struct qdisc_rate_table *tab)
449{
450 struct qdisc_rate_table *rtab, **rtabp;
451
452 if (!tab || --tab->refcnt)
453 return;
454
cc7ec456
ED
455 for (rtabp = &qdisc_rtab_list;
456 (rtab = *rtabp) != NULL;
457 rtabp = &rtab->next) {
1da177e4
LT
458 if (rtab == tab) {
459 *rtabp = rtab->next;
460 kfree(rtab);
461 return;
462 }
463 }
464}
62e3ba1b 465EXPORT_SYMBOL(qdisc_put_rtab);
1da177e4 466
175f9c1b 467static LIST_HEAD(qdisc_stab_list);
175f9c1b
JK
468
469static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
470 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
471 [TCA_STAB_DATA] = { .type = NLA_BINARY },
472};
473
09215598
AA
474static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
475 struct netlink_ext_ack *extack)
175f9c1b
JK
476{
477 struct nlattr *tb[TCA_STAB_MAX + 1];
478 struct qdisc_size_table *stab;
479 struct tc_sizespec *s;
480 unsigned int tsize = 0;
481 u16 *tab = NULL;
482 int err;
483
8cb08174
JB
484 err = nla_parse_nested_deprecated(tb, TCA_STAB_MAX, opt, stab_policy,
485 extack);
175f9c1b
JK
486 if (err < 0)
487 return ERR_PTR(err);
09215598
AA
488 if (!tb[TCA_STAB_BASE]) {
489 NL_SET_ERR_MSG(extack, "Size table base attribute is missing");
175f9c1b 490 return ERR_PTR(-EINVAL);
09215598 491 }
175f9c1b
JK
492
493 s = nla_data(tb[TCA_STAB_BASE]);
494
495 if (s->tsize > 0) {
09215598
AA
496 if (!tb[TCA_STAB_DATA]) {
497 NL_SET_ERR_MSG(extack, "Size table data attribute is missing");
175f9c1b 498 return ERR_PTR(-EINVAL);
09215598 499 }
175f9c1b
JK
500 tab = nla_data(tb[TCA_STAB_DATA]);
501 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
502 }
503
09215598
AA
504 if (tsize != s->tsize || (!tab && tsize > 0)) {
505 NL_SET_ERR_MSG(extack, "Invalid size of size table");
175f9c1b 506 return ERR_PTR(-EINVAL);
09215598 507 }
175f9c1b 508
175f9c1b
JK
509 list_for_each_entry(stab, &qdisc_stab_list, list) {
510 if (memcmp(&stab->szopts, s, sizeof(*s)))
511 continue;
69508d43
GS
512 if (tsize > 0 &&
513 memcmp(stab->data, tab, flex_array_size(stab, data, tsize)))
175f9c1b
JK
514 continue;
515 stab->refcnt++;
175f9c1b
JK
516 return stab;
517 }
518
b193e15a 519 if (s->size_log > STAB_SIZE_LOG_MAX ||
520 s->cell_log > STAB_SIZE_LOG_MAX) {
521 NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table");
522 return ERR_PTR(-EINVAL);
523 }
524
69508d43 525 stab = kmalloc(struct_size(stab, data, tsize), GFP_KERNEL);
175f9c1b
JK
526 if (!stab)
527 return ERR_PTR(-ENOMEM);
528
529 stab->refcnt = 1;
530 stab->szopts = *s;
531 if (tsize > 0)
69508d43 532 memcpy(stab->data, tab, flex_array_size(stab, data, tsize));
175f9c1b 533
175f9c1b 534 list_add_tail(&stab->list, &qdisc_stab_list);
175f9c1b
JK
535
536 return stab;
537}
538
539void qdisc_put_stab(struct qdisc_size_table *tab)
540{
541 if (!tab)
542 return;
543
175f9c1b
JK
544 if (--tab->refcnt == 0) {
545 list_del(&tab->list);
6e07902f 546 kfree_rcu(tab, rcu);
175f9c1b 547 }
175f9c1b
JK
548}
549EXPORT_SYMBOL(qdisc_put_stab);
550
551static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
552{
553 struct nlattr *nest;
554
ae0be8de 555 nest = nla_nest_start_noflag(skb, TCA_STAB);
3aa4614d
PM
556 if (nest == NULL)
557 goto nla_put_failure;
1b34ec43
DM
558 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
559 goto nla_put_failure;
175f9c1b
JK
560 nla_nest_end(skb, nest);
561
562 return skb->len;
563
564nla_put_failure:
565 return -1;
566}
567
5a7a5555
JHS
568void __qdisc_calculate_pkt_len(struct sk_buff *skb,
569 const struct qdisc_size_table *stab)
175f9c1b
JK
570{
571 int pkt_len, slot;
572
573 pkt_len = skb->len + stab->szopts.overhead;
574 if (unlikely(!stab->szopts.tsize))
575 goto out;
576
577 slot = pkt_len + stab->szopts.cell_align;
578 if (unlikely(slot < 0))
579 slot = 0;
580
581 slot >>= stab->szopts.cell_log;
582 if (likely(slot < stab->szopts.tsize))
583 pkt_len = stab->data[slot];
584 else
585 pkt_len = stab->data[stab->szopts.tsize - 1] *
586 (slot / stab->szopts.tsize) +
587 stab->data[slot % stab->szopts.tsize];
588
589 pkt_len <<= stab->szopts.size_log;
590out:
591 if (unlikely(pkt_len < 1))
592 pkt_len = 1;
593 qdisc_skb_cb(skb)->pkt_len = pkt_len;
594}
a2da570d 595EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
175f9c1b 596
6e765a00 597void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc)
b00355db
JP
598{
599 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
cc7ec456
ED
600 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
601 txt, qdisc->ops->id, qdisc->handle >> 16);
b00355db
JP
602 qdisc->flags |= TCQ_F_WARN_NONWC;
603 }
604}
605EXPORT_SYMBOL(qdisc_warn_nonwc);
606
4179477f
PM
607static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
608{
609 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
2fbd3da3 610 timer);
4179477f 611
1e203c1a 612 rcu_read_lock();
8608db03 613 __netif_schedule(qdisc_root(wd->qdisc));
1e203c1a 614 rcu_read_unlock();
1936502d 615
4179477f
PM
616 return HRTIMER_NORESTART;
617}
618
860b642b
VCG
619void qdisc_watchdog_init_clockid(struct qdisc_watchdog *wd, struct Qdisc *qdisc,
620 clockid_t clockid)
4179477f 621{
860b642b 622 hrtimer_init(&wd->timer, clockid, HRTIMER_MODE_ABS_PINNED);
2fbd3da3 623 wd->timer.function = qdisc_watchdog;
4179477f
PM
624 wd->qdisc = qdisc;
625}
860b642b
VCG
626EXPORT_SYMBOL(qdisc_watchdog_init_clockid);
627
628void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
629{
630 qdisc_watchdog_init_clockid(wd, qdisc, CLOCK_MONOTONIC);
631}
4179477f
PM
632EXPORT_SYMBOL(qdisc_watchdog_init);
633
efe074c2
ED
634void qdisc_watchdog_schedule_range_ns(struct qdisc_watchdog *wd, u64 expires,
635 u64 delta_ns)
4179477f 636{
2540e051
JP
637 if (test_bit(__QDISC_STATE_DEACTIVATED,
638 &qdisc_root_sleeping(wd->qdisc)->state))
639 return;
640
b88948fb
ED
641 if (hrtimer_is_queued(&wd->timer)) {
642 /* If timer is already set in [expires, expires + delta_ns],
643 * do not reprogram it.
644 */
645 if (wd->last_expires - expires <= delta_ns)
646 return;
647 }
a9efad8b
ED
648
649 wd->last_expires = expires;
efe074c2
ED
650 hrtimer_start_range_ns(&wd->timer,
651 ns_to_ktime(expires),
652 delta_ns,
653 HRTIMER_MODE_ABS_PINNED);
4179477f 654}
efe074c2 655EXPORT_SYMBOL(qdisc_watchdog_schedule_range_ns);
4179477f
PM
656
657void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
658{
2fbd3da3 659 hrtimer_cancel(&wd->timer);
4179477f
PM
660}
661EXPORT_SYMBOL(qdisc_watchdog_cancel);
1da177e4 662
a94f779f 663static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
6fe1c7a5 664{
6fe1c7a5 665 struct hlist_head *h;
9695fe6f 666 unsigned int i;
6fe1c7a5 667
9695fe6f 668 h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL);
6fe1c7a5
PM
669
670 if (h != NULL) {
671 for (i = 0; i < n; i++)
672 INIT_HLIST_HEAD(&h[i]);
673 }
674 return h;
675}
676
6fe1c7a5
PM
677void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
678{
679 struct Qdisc_class_common *cl;
b67bfe0d 680 struct hlist_node *next;
6fe1c7a5
PM
681 struct hlist_head *nhash, *ohash;
682 unsigned int nsize, nmask, osize;
683 unsigned int i, h;
684
685 /* Rehash when load factor exceeds 0.75 */
686 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
687 return;
688 nsize = clhash->hashsize * 2;
689 nmask = nsize - 1;
690 nhash = qdisc_class_hash_alloc(nsize);
691 if (nhash == NULL)
692 return;
693
694 ohash = clhash->hash;
695 osize = clhash->hashsize;
696
697 sch_tree_lock(sch);
698 for (i = 0; i < osize; i++) {
b67bfe0d 699 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
6fe1c7a5
PM
700 h = qdisc_class_hash(cl->classid, nmask);
701 hlist_add_head(&cl->hnode, &nhash[h]);
702 }
703 }
704 clhash->hash = nhash;
705 clhash->hashsize = nsize;
706 clhash->hashmask = nmask;
707 sch_tree_unlock(sch);
708
9695fe6f 709 kvfree(ohash);
6fe1c7a5
PM
710}
711EXPORT_SYMBOL(qdisc_class_hash_grow);
712
713int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
714{
715 unsigned int size = 4;
716
717 clhash->hash = qdisc_class_hash_alloc(size);
ac8ef4ab 718 if (!clhash->hash)
6fe1c7a5
PM
719 return -ENOMEM;
720 clhash->hashsize = size;
721 clhash->hashmask = size - 1;
722 clhash->hashelems = 0;
723 return 0;
724}
725EXPORT_SYMBOL(qdisc_class_hash_init);
726
727void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
728{
9695fe6f 729 kvfree(clhash->hash);
6fe1c7a5
PM
730}
731EXPORT_SYMBOL(qdisc_class_hash_destroy);
732
733void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
734 struct Qdisc_class_common *cl)
735{
736 unsigned int h;
737
738 INIT_HLIST_NODE(&cl->hnode);
739 h = qdisc_class_hash(cl->classid, clhash->hashmask);
740 hlist_add_head(&cl->hnode, &clhash->hash[h]);
741 clhash->hashelems++;
742}
743EXPORT_SYMBOL(qdisc_class_hash_insert);
744
745void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
746 struct Qdisc_class_common *cl)
747{
748 hlist_del(&cl->hnode);
749 clhash->hashelems--;
750}
751EXPORT_SYMBOL(qdisc_class_hash_remove);
752
fa0f5aa7
ED
753/* Allocate an unique handle from space managed by kernel
754 * Possible range is [8000-FFFF]:0000 (0x8000 values)
755 */
1da177e4
LT
756static u32 qdisc_alloc_handle(struct net_device *dev)
757{
fa0f5aa7 758 int i = 0x8000;
1da177e4
LT
759 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
760
761 do {
762 autohandle += TC_H_MAKE(0x10000U, 0);
763 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
764 autohandle = TC_H_MAKE(0x80000000U, 0);
fa0f5aa7
ED
765 if (!qdisc_lookup(dev, autohandle))
766 return autohandle;
767 cond_resched();
768 } while (--i > 0);
1da177e4 769
fa0f5aa7 770 return 0;
1da177e4
LT
771}
772
5f2939d9 773void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
43effa1e 774{
fd5ac14a 775 bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
20fea08b 776 const struct Qdisc_class_ops *cops;
43effa1e
PM
777 unsigned long cl;
778 u32 parentid;
95946658 779 bool notify;
2c8c8e6f 780 int drops;
43effa1e 781
2ccccf5f 782 if (n == 0 && len == 0)
43effa1e 783 return;
2c8c8e6f 784 drops = max_t(int, n, 0);
4eaf3b84 785 rcu_read_lock();
43effa1e 786 while ((parentid = sch->parent)) {
066a3b5b 787 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
4eaf3b84 788 break;
066a3b5b 789
4eaf3b84
ED
790 if (sch->flags & TCQ_F_NOPARENT)
791 break;
95946658
KK
792 /* Notify parent qdisc only if child qdisc becomes empty.
793 *
794 * If child was empty even before update then backlog
795 * counter is screwed and we skip notification because
796 * parent class is already passive.
fd5ac14a
NF
797 *
798 * If the original child was offloaded then it is allowed
799 * to be seem as empty, so the parent is notified anyway.
95946658 800 */
fd5ac14a
NF
801 notify = !sch->q.qlen && !WARN_ON_ONCE(!n &&
802 !qdisc_is_offloaded);
4eaf3b84 803 /* TODO: perform the search on a per txq basis */
5ce2d488 804 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
ffc8fefa 805 if (sch == NULL) {
4eaf3b84
ED
806 WARN_ON_ONCE(parentid != TC_H_ROOT);
807 break;
ffc8fefa 808 }
43effa1e 809 cops = sch->ops->cl_ops;
95946658 810 if (notify && cops->qlen_notify) {
143976ce 811 cl = cops->find(sch, parentid);
43effa1e 812 cops->qlen_notify(sch, cl);
43effa1e
PM
813 }
814 sch->q.qlen -= n;
2ccccf5f 815 sch->qstats.backlog -= len;
25331d6c 816 __qdisc_qstats_drop(sch, drops);
43effa1e 817 }
4eaf3b84 818 rcu_read_unlock();
43effa1e 819}
2ccccf5f 820EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
1da177e4 821
b592843c
JK
822int qdisc_offload_dump_helper(struct Qdisc *sch, enum tc_setup_type type,
823 void *type_data)
824{
825 struct net_device *dev = qdisc_dev(sch);
826 int err;
827
828 sch->flags &= ~TCQ_F_OFFLOADED;
829 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
830 return 0;
831
832 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
833 if (err == -EOPNOTSUPP)
834 return 0;
835
836 if (!err)
837 sch->flags |= TCQ_F_OFFLOADED;
838
839 return err;
840}
841EXPORT_SYMBOL(qdisc_offload_dump_helper);
842
bfaee911
JK
843void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch,
844 struct Qdisc *new, struct Qdisc *old,
845 enum tc_setup_type type, void *type_data,
846 struct netlink_ext_ack *extack)
847{
848 bool any_qdisc_is_offloaded;
849 int err;
850
851 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
852 return;
853
854 err = dev->netdev_ops->ndo_setup_tc(dev, type, type_data);
855
856 /* Don't report error if the graft is part of destroy operation. */
857 if (!err || !new || new == &noop_qdisc)
858 return;
859
860 /* Don't report error if the parent, the old child and the new
861 * one are not offloaded.
862 */
863 any_qdisc_is_offloaded = new->flags & TCQ_F_OFFLOADED;
864 any_qdisc_is_offloaded |= sch && sch->flags & TCQ_F_OFFLOADED;
865 any_qdisc_is_offloaded |= old && old->flags & TCQ_F_OFFLOADED;
866
867 if (any_qdisc_is_offloaded)
868 NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
869}
870EXPORT_SYMBOL(qdisc_offload_graft_helper);
871
aac4daa8
VO
872void qdisc_offload_query_caps(struct net_device *dev,
873 enum tc_setup_type type,
874 void *caps, size_t caps_len)
875{
876 const struct net_device_ops *ops = dev->netdev_ops;
877 struct tc_query_caps_base base = {
878 .type = type,
879 .caps = caps,
880 };
881
882 memset(caps, 0, caps_len);
883
884 if (ops->ndo_setup_tc)
885 ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base);
886}
887EXPORT_SYMBOL(qdisc_offload_query_caps);
888
98b0e5f6
JK
889static void qdisc_offload_graft_root(struct net_device *dev,
890 struct Qdisc *new, struct Qdisc *old,
891 struct netlink_ext_ack *extack)
892{
893 struct tc_root_qopt_offload graft_offload = {
894 .command = TC_ROOT_GRAFT,
895 .handle = new ? new->handle : 0,
896 .ingress = (new && new->flags & TCQ_F_INGRESS) ||
897 (old && old->flags & TCQ_F_INGRESS),
898 };
899
900 qdisc_offload_graft_helper(dev, NULL, new, old,
901 TC_SETUP_ROOT_QDISC, &graft_offload, extack);
902}
903
27d7f07c
WC
904static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
905 u32 portid, u32 seq, u16 flags, int event)
906{
50dc9a85 907 struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
27d7f07c
WC
908 struct gnet_stats_queue __percpu *cpu_qstats = NULL;
909 struct tcmsg *tcm;
910 struct nlmsghdr *nlh;
911 unsigned char *b = skb_tail_pointer(skb);
912 struct gnet_dump d;
913 struct qdisc_size_table *stab;
d47a6b0e 914 u32 block_index;
27d7f07c
WC
915 __u32 qlen;
916
917 cond_resched();
918 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
919 if (!nlh)
920 goto out_nlmsg_trim;
921 tcm = nlmsg_data(nlh);
922 tcm->tcm_family = AF_UNSPEC;
923 tcm->tcm__pad1 = 0;
924 tcm->tcm__pad2 = 0;
925 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
926 tcm->tcm_parent = clid;
927 tcm->tcm_handle = q->handle;
928 tcm->tcm_info = refcount_read(&q->refcnt);
929 if (nla_put_string(skb, TCA_KIND, q->ops->id))
930 goto nla_put_failure;
d47a6b0e
JP
931 if (q->ops->ingress_block_get) {
932 block_index = q->ops->ingress_block_get(q);
933 if (block_index &&
934 nla_put_u32(skb, TCA_INGRESS_BLOCK, block_index))
935 goto nla_put_failure;
936 }
937 if (q->ops->egress_block_get) {
938 block_index = q->ops->egress_block_get(q);
939 if (block_index &&
940 nla_put_u32(skb, TCA_EGRESS_BLOCK, block_index))
941 goto nla_put_failure;
942 }
27d7f07c
WC
943 if (q->ops->dump && q->ops->dump(q, skb) < 0)
944 goto nla_put_failure;
44edf2f8
NF
945 if (nla_put_u8(skb, TCA_HW_OFFLOAD, !!(q->flags & TCQ_F_OFFLOADED)))
946 goto nla_put_failure;
7e66016f 947 qlen = qdisc_qlen_sum(q);
27d7f07c
WC
948
949 stab = rtnl_dereference(q->stab);
950 if (stab && qdisc_dump_stab(skb, stab) < 0)
951 goto nla_put_failure;
952
953 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
954 NULL, &d, TCA_PAD) < 0)
955 goto nla_put_failure;
956
957 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
958 goto nla_put_failure;
959
960 if (qdisc_is_percpu_stats(q)) {
961 cpu_bstats = q->cpu_bstats;
962 cpu_qstats = q->cpu_qstats;
963 }
964
29cbcd85 965 if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
27d7f07c
WC
966 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
967 gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
968 goto nla_put_failure;
969
970 if (gnet_stats_finish_copy(&d) < 0)
971 goto nla_put_failure;
972
973 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
974 return skb->len;
975
976out_nlmsg_trim:
977nla_put_failure:
978 nlmsg_trim(skb, b);
979 return -1;
980}
981
982static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
983{
984 if (q->flags & TCQ_F_BUILTIN)
985 return true;
986 if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible)
987 return true;
988
989 return false;
990}
991
992static int qdisc_notify(struct net *net, struct sk_buff *oskb,
993 struct nlmsghdr *n, u32 clid,
994 struct Qdisc *old, struct Qdisc *new)
995{
996 struct sk_buff *skb;
997 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
998
999 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1000 if (!skb)
1001 return -ENOBUFS;
1002
1003 if (old && !tc_qdisc_dump_ignore(old, false)) {
1004 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
1005 0, RTM_DELQDISC) < 0)
1006 goto err_out;
1007 }
1008 if (new && !tc_qdisc_dump_ignore(new, false)) {
1009 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
1010 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1011 goto err_out;
1012 }
1013
1014 if (skb->len)
1015 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1016 n->nlmsg_flags & NLM_F_ECHO);
1017
1018err_out:
1019 kfree_skb(skb);
1020 return -EINVAL;
1021}
1022
7316ae88
TG
1023static void notify_and_destroy(struct net *net, struct sk_buff *skb,
1024 struct nlmsghdr *n, u32 clid,
99194cff
DM
1025 struct Qdisc *old, struct Qdisc *new)
1026{
1027 if (new || old)
7316ae88 1028 qdisc_notify(net, skb, n, clid, old, new);
1da177e4 1029
4d8863a2 1030 if (old)
86bd446b 1031 qdisc_put(old);
99194cff
DM
1032}
1033
8a53e616
PA
1034static void qdisc_clear_nolock(struct Qdisc *sch)
1035{
1036 sch->flags &= ~TCQ_F_NOLOCK;
1037 if (!(sch->flags & TCQ_F_CPUSTATS))
1038 return;
1039
1040 free_percpu(sch->cpu_bstats);
1041 free_percpu(sch->cpu_qstats);
1042 sch->cpu_bstats = NULL;
1043 sch->cpu_qstats = NULL;
1044 sch->flags &= ~TCQ_F_CPUSTATS;
1045}
1046
99194cff
DM
1047/* Graft qdisc "new" to class "classid" of qdisc "parent" or
1048 * to device "dev".
1049 *
1050 * When appropriate send a netlink notification using 'skb'
1051 * and "n".
1052 *
1053 * On success, destroy old qdisc.
1da177e4
LT
1054 */
1055
1056static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
99194cff 1057 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
09215598
AA
1058 struct Qdisc *new, struct Qdisc *old,
1059 struct netlink_ext_ack *extack)
1da177e4 1060{
99194cff 1061 struct Qdisc *q = old;
7316ae88 1062 struct net *net = dev_net(dev);
1da177e4 1063
10297b99 1064 if (parent == NULL) {
99194cff
DM
1065 unsigned int i, num_q, ingress;
1066
1067 ingress = 0;
1068 num_q = dev->num_tx_queues;
8d50b53d
DM
1069 if ((q && q->flags & TCQ_F_INGRESS) ||
1070 (new && new->flags & TCQ_F_INGRESS)) {
99194cff
DM
1071 num_q = 1;
1072 ingress = 1;
09215598
AA
1073 if (!dev_ingress_queue(dev)) {
1074 NL_SET_ERR_MSG(extack, "Device does not have an ingress queue");
24824a09 1075 return -ENOENT;
09215598 1076 }
99194cff
DM
1077 }
1078
1079 if (dev->flags & IFF_UP)
1080 dev_deactivate(dev);
1081
98b0e5f6
JK
1082 qdisc_offload_graft_root(dev, new, old, extack);
1083
de2d807b 1084 if (new && new->ops->attach && !ingress)
86e363dc 1085 goto skip;
6ec1c69a 1086
99194cff 1087 for (i = 0; i < num_q; i++) {
24824a09 1088 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
99194cff
DM
1089
1090 if (!ingress)
1091 dev_queue = netdev_get_tx_queue(dev, i);
1092
8d50b53d
DM
1093 old = dev_graft_qdisc(dev_queue, new);
1094 if (new && i > 0)
551143d8 1095 qdisc_refcount_inc(new);
8d50b53d 1096
036d6a67 1097 if (!ingress)
86bd446b 1098 qdisc_put(old);
1da177e4 1099 }
99194cff 1100
86e363dc 1101skip:
036d6a67 1102 if (!ingress) {
ebda44da 1103 old = rtnl_dereference(dev->qdisc);
036d6a67 1104 if (new && !new->ops->attach)
551143d8 1105 qdisc_refcount_inc(new);
5891cd5e 1106 rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
86e363dc 1107
ebda44da
ED
1108 notify_and_destroy(net, skb, n, classid, old, new);
1109
86e363dc
WC
1110 if (new && new->ops->attach)
1111 new->ops->attach(new);
036d6a67 1112 } else {
7316ae88 1113 notify_and_destroy(net, skb, n, classid, old, new);
036d6a67 1114 }
af356afa 1115
99194cff
DM
1116 if (dev->flags & IFF_UP)
1117 dev_activate(dev);
1da177e4 1118 } else {
20fea08b 1119 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
9da93ece
JK
1120 unsigned long cl;
1121 int err;
1da177e4 1122
c5ad119f 1123 /* Only support running class lockless if parent is lockless */
8eaf8d99 1124 if (new && (new->flags & TCQ_F_NOLOCK) && !(parent->flags & TCQ_F_NOLOCK))
8a53e616 1125 qdisc_clear_nolock(new);
c5ad119f 1126
9da93ece
JK
1127 if (!cops || !cops->graft)
1128 return -EOPNOTSUPP;
143976ce 1129
9da93ece
JK
1130 cl = cops->find(parent, classid);
1131 if (!cl) {
1132 NL_SET_ERR_MSG(extack, "Specified class not found");
1133 return -ENOENT;
1da177e4 1134 }
9da93ece
JK
1135
1136 err = cops->graft(parent, cl, new, &old, extack);
1137 if (err)
1138 return err;
1139 notify_and_destroy(net, skb, n, classid, old, new);
1da177e4 1140 }
9da93ece 1141 return 0;
1da177e4
LT
1142}
1143
d47a6b0e
JP
1144static int qdisc_block_indexes_set(struct Qdisc *sch, struct nlattr **tca,
1145 struct netlink_ext_ack *extack)
1146{
1147 u32 block_index;
1148
1149 if (tca[TCA_INGRESS_BLOCK]) {
1150 block_index = nla_get_u32(tca[TCA_INGRESS_BLOCK]);
1151
1152 if (!block_index) {
1153 NL_SET_ERR_MSG(extack, "Ingress block index cannot be 0");
1154 return -EINVAL;
1155 }
1156 if (!sch->ops->ingress_block_set) {
1157 NL_SET_ERR_MSG(extack, "Ingress block sharing is not supported");
1158 return -EOPNOTSUPP;
1159 }
1160 sch->ops->ingress_block_set(sch, block_index);
1161 }
1162 if (tca[TCA_EGRESS_BLOCK]) {
1163 block_index = nla_get_u32(tca[TCA_EGRESS_BLOCK]);
1164
1165 if (!block_index) {
1166 NL_SET_ERR_MSG(extack, "Egress block index cannot be 0");
1167 return -EINVAL;
1168 }
1169 if (!sch->ops->egress_block_set) {
1170 NL_SET_ERR_MSG(extack, "Egress block sharing is not supported");
1171 return -EOPNOTSUPP;
1172 }
1173 sch->ops->egress_block_set(sch, block_index);
1174 }
1175 return 0;
1176}
1177
1da177e4
LT
1178/*
1179 Allocate and initialize new qdisc.
1180
1181 Parameters are passed via opt.
1182 */
1183
5a7a5555
JHS
1184static struct Qdisc *qdisc_create(struct net_device *dev,
1185 struct netdev_queue *dev_queue,
cfc111d5 1186 u32 parent, u32 handle,
09215598
AA
1187 struct nlattr **tca, int *errp,
1188 struct netlink_ext_ack *extack)
1da177e4
LT
1189{
1190 int err;
1e90474c 1191 struct nlattr *kind = tca[TCA_KIND];
1da177e4
LT
1192 struct Qdisc *sch;
1193 struct Qdisc_ops *ops;
175f9c1b 1194 struct qdisc_size_table *stab;
1da177e4
LT
1195
1196 ops = qdisc_lookup_ops(kind);
95a5afca 1197#ifdef CONFIG_MODULES
1da177e4
LT
1198 if (ops == NULL && kind != NULL) {
1199 char name[IFNAMSIZ];
872f6903 1200 if (nla_strscpy(name, kind, IFNAMSIZ) >= 0) {
1da177e4
LT
1201 /* We dropped the RTNL semaphore in order to
1202 * perform the module load. So, even if we
1203 * succeeded in loading the module we have to
1204 * tell the caller to replay the request. We
1205 * indicate this using -EAGAIN.
1206 * We replay the request because the device may
1207 * go away in the mean time.
1208 */
1209 rtnl_unlock();
1210 request_module("sch_%s", name);
1211 rtnl_lock();
1212 ops = qdisc_lookup_ops(kind);
1213 if (ops != NULL) {
1214 /* We will try again qdisc_lookup_ops,
1215 * so don't keep a reference.
1216 */
1217 module_put(ops->owner);
1218 err = -EAGAIN;
1219 goto err_out;
1220 }
1221 }
1222 }
1223#endif
1224
b9e2cc0f 1225 err = -ENOENT;
09215598 1226 if (!ops) {
973bf8fd 1227 NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown");
1da177e4 1228 goto err_out;
09215598 1229 }
1da177e4 1230
d0bd684d 1231 sch = qdisc_alloc(dev_queue, ops, extack);
3d54b82f
TG
1232 if (IS_ERR(sch)) {
1233 err = PTR_ERR(sch);
1da177e4 1234 goto err_out2;
3d54b82f 1235 }
1da177e4 1236
ffc8fefa
PM
1237 sch->parent = parent;
1238
3d54b82f 1239 if (handle == TC_H_INGRESS) {
1da177e4 1240 sch->flags |= TCQ_F_INGRESS;
3d54b82f 1241 handle = TC_H_MAKE(TC_H_INGRESS, 0);
fd44de7c 1242 } else {
fd44de7c
PM
1243 if (handle == 0) {
1244 handle = qdisc_alloc_handle(dev);
aaeb1dea
IV
1245 if (handle == 0) {
1246 NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
1247 err = -ENOSPC;
fd44de7c 1248 goto err_out3;
aaeb1dea 1249 }
fd44de7c 1250 }
1abbe139 1251 if (!netif_is_multiqueue(dev))
225734de 1252 sch->flags |= TCQ_F_ONETXQUEUE;
1da177e4
LT
1253 }
1254
3d54b82f 1255 sch->handle = handle;
1da177e4 1256
84c46dd8
JDB
1257 /* This exist to keep backward compatible with a userspace
1258 * loophole, what allowed userspace to get IFF_NO_QUEUE
1259 * facility on older kernels by setting tx_queue_len=0 (prior
1260 * to qdisc init), and then forgot to reinit tx_queue_len
1261 * before again attaching a qdisc.
1262 */
1263 if ((dev->priv_flags & IFF_NO_QUEUE) && (dev->tx_queue_len == 0)) {
1264 dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
1265 netdev_info(dev, "Caught tx_queue_len zero misconfig\n");
1266 }
1267
d47a6b0e
JP
1268 err = qdisc_block_indexes_set(sch, tca, extack);
1269 if (err)
1270 goto err_out3;
1271
54160ef6 1272 if (ops->init) {
e63d7dfd 1273 err = ops->init(sch, tca[TCA_OPTIONS], extack);
54160ef6
AA
1274 if (err != 0)
1275 goto err_out5;
1276 }
22e0f8b9 1277
54160ef6 1278 if (tca[TCA_STAB]) {
09215598 1279 stab = qdisc_get_stab(tca[TCA_STAB], extack);
54160ef6
AA
1280 if (IS_ERR(stab)) {
1281 err = PTR_ERR(stab);
1282 goto err_out4;
023e09a7 1283 }
54160ef6
AA
1284 rcu_assign_pointer(sch->stab, stab);
1285 }
1286 if (tca[TCA_RATE]) {
54160ef6 1287 err = -EOPNOTSUPP;
09215598
AA
1288 if (sch->flags & TCQ_F_MQROOT) {
1289 NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
54160ef6 1290 goto err_out4;
09215598 1291 }
1da177e4 1292
54160ef6
AA
1293 err = gen_new_estimator(&sch->bstats,
1294 sch->cpu_bstats,
1295 &sch->rate_est,
1296 NULL,
29cbcd85 1297 true,
54160ef6 1298 tca[TCA_RATE]);
09215598
AA
1299 if (err) {
1300 NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
54160ef6 1301 goto err_out4;
09215598 1302 }
1da177e4 1303 }
54160ef6
AA
1304
1305 qdisc_hash_add(sch, false);
f5a7833e 1306 trace_qdisc_create(ops, dev, parent);
54160ef6
AA
1307
1308 return sch;
1309
1310err_out5:
87b60cfa 1311 /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
c1a4872e
GF
1312 if (ops->destroy)
1313 ops->destroy(sch);
1da177e4 1314err_out3:
d62607c3 1315 netdev_put(dev, &sch->dev_tracker);
81d947e2 1316 qdisc_free(sch);
1da177e4
LT
1317err_out2:
1318 module_put(ops->owner);
1319err_out:
1320 *errp = err;
1da177e4 1321 return NULL;
23bcf634
PM
1322
1323err_out4:
1324 /*
1325 * Any broken qdiscs that would require a ops->reset() here?
1326 * The qdisc was never in action so it shouldn't be necessary.
1327 */
a2da570d 1328 qdisc_put_stab(rtnl_dereference(sch->stab));
23bcf634
PM
1329 if (ops->destroy)
1330 ops->destroy(sch);
1331 goto err_out3;
1da177e4
LT
1332}
1333
09215598
AA
1334static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
1335 struct netlink_ext_ack *extack)
1da177e4 1336{
a2da570d 1337 struct qdisc_size_table *ostab, *stab = NULL;
175f9c1b 1338 int err = 0;
1da177e4 1339
175f9c1b 1340 if (tca[TCA_OPTIONS]) {
09215598
AA
1341 if (!sch->ops->change) {
1342 NL_SET_ERR_MSG(extack, "Change operation not supported by specified qdisc");
1da177e4 1343 return -EINVAL;
09215598 1344 }
d47a6b0e
JP
1345 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
1346 NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
1347 return -EOPNOTSUPP;
1348 }
2030721c 1349 err = sch->ops->change(sch, tca[TCA_OPTIONS], extack);
1da177e4
LT
1350 if (err)
1351 return err;
1352 }
175f9c1b
JK
1353
1354 if (tca[TCA_STAB]) {
09215598 1355 stab = qdisc_get_stab(tca[TCA_STAB], extack);
175f9c1b
JK
1356 if (IS_ERR(stab))
1357 return PTR_ERR(stab);
1358 }
1359
a2da570d
ED
1360 ostab = rtnl_dereference(sch->stab);
1361 rcu_assign_pointer(sch->stab, stab);
1362 qdisc_put_stab(ostab);
175f9c1b 1363
23bcf634 1364 if (tca[TCA_RATE]) {
71bcb09a
SH
1365 /* NB: ignores errors from replace_estimator
1366 because change can't be undone. */
23bcf634
PM
1367 if (sch->flags & TCQ_F_MQROOT)
1368 goto out;
22e0f8b9
JF
1369 gen_replace_estimator(&sch->bstats,
1370 sch->cpu_bstats,
1371 &sch->rate_est,
edb09eb1 1372 NULL,
29cbcd85 1373 true,
22e0f8b9 1374 tca[TCA_RATE]);
23bcf634
PM
1375 }
1376out:
1da177e4
LT
1377 return 0;
1378}
1379
cc7ec456
ED
1380struct check_loop_arg {
1381 struct qdisc_walker w;
1da177e4
LT
1382 struct Qdisc *p;
1383 int depth;
1384};
1385
5a7a5555
JHS
1386static int check_loop_fn(struct Qdisc *q, unsigned long cl,
1387 struct qdisc_walker *w);
1da177e4
LT
1388
1389static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1390{
1391 struct check_loop_arg arg;
1392
1393 if (q->ops->cl_ops == NULL)
1394 return 0;
1395
1396 arg.w.stop = arg.w.skip = arg.w.count = 0;
1397 arg.w.fn = check_loop_fn;
1398 arg.depth = depth;
1399 arg.p = p;
1400 q->ops->cl_ops->walk(q, &arg.w);
1401 return arg.w.stop ? -ELOOP : 0;
1402}
1403
1404static int
1405check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1406{
1407 struct Qdisc *leaf;
20fea08b 1408 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1da177e4
LT
1409 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1410
1411 leaf = cops->leaf(q, cl);
1412 if (leaf) {
1413 if (leaf == arg->p || arg->depth > 7)
1414 return -ELOOP;
1415 return check_loop(leaf, arg->p, arg->depth + 1);
1416 }
1417 return 0;
1418}
1419
8b4c3cdd 1420const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
6f96c3c6 1421 [TCA_KIND] = { .type = NLA_STRING },
8b4c3cdd
DA
1422 [TCA_RATE] = { .type = NLA_BINARY,
1423 .len = sizeof(struct tc_estimator) },
1424 [TCA_STAB] = { .type = NLA_NESTED },
1425 [TCA_DUMP_INVISIBLE] = { .type = NLA_FLAG },
1426 [TCA_CHAIN] = { .type = NLA_U32 },
1427 [TCA_INGRESS_BLOCK] = { .type = NLA_U32 },
1428 [TCA_EGRESS_BLOCK] = { .type = NLA_U32 },
1429};
1430
e331473f
DC
1431/*
1432 * Delete/get qdisc.
1433 */
1434
c21ef3e3
DA
1435static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1436 struct netlink_ext_ack *extack)
1da177e4 1437{
3b1e0a65 1438 struct net *net = sock_net(skb->sk);
02ef22ca 1439 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1440 struct nlattr *tca[TCA_MAX + 1];
1da177e4 1441 struct net_device *dev;
de179c8c 1442 u32 clid;
1da177e4
LT
1443 struct Qdisc *q = NULL;
1444 struct Qdisc *p = NULL;
1445 int err;
1446
8cb08174
JB
1447 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1448 rtm_tca_policy, extack);
1e90474c
PM
1449 if (err < 0)
1450 return err;
1451
de179c8c
H
1452 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1453 if (!dev)
1454 return -ENODEV;
1455
1456 clid = tcm->tcm_parent;
1da177e4
LT
1457 if (clid) {
1458 if (clid != TC_H_ROOT) {
1459 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
cc7ec456 1460 p = qdisc_lookup(dev, TC_H_MAJ(clid));
09215598
AA
1461 if (!p) {
1462 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified classid");
1da177e4 1463 return -ENOENT;
09215598 1464 }
1da177e4 1465 q = qdisc_leaf(p, clid);
cc7ec456
ED
1466 } else if (dev_ingress_queue(dev)) {
1467 q = dev_ingress_queue(dev)->qdisc_sleeping;
10297b99 1468 }
1da177e4 1469 } else {
5891cd5e 1470 q = rtnl_dereference(dev->qdisc);
1da177e4 1471 }
09215598
AA
1472 if (!q) {
1473 NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device");
1da177e4 1474 return -ENOENT;
09215598 1475 }
1da177e4 1476
09215598
AA
1477 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) {
1478 NL_SET_ERR_MSG(extack, "Invalid handle");
1da177e4 1479 return -EINVAL;
09215598 1480 }
1da177e4 1481 } else {
cc7ec456 1482 q = qdisc_lookup(dev, tcm->tcm_handle);
09215598
AA
1483 if (!q) {
1484 NL_SET_ERR_MSG(extack, "Failed to find qdisc with specified handle");
1da177e4 1485 return -ENOENT;
09215598 1486 }
1da177e4
LT
1487 }
1488
09215598
AA
1489 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1490 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1491 return -EINVAL;
09215598 1492 }
1da177e4
LT
1493
1494 if (n->nlmsg_type == RTM_DELQDISC) {
09215598
AA
1495 if (!clid) {
1496 NL_SET_ERR_MSG(extack, "Classid cannot be zero");
1da177e4 1497 return -EINVAL;
09215598
AA
1498 }
1499 if (q->handle == 0) {
1500 NL_SET_ERR_MSG(extack, "Cannot delete qdisc with handle of zero");
1da177e4 1501 return -ENOENT;
09215598
AA
1502 }
1503 err = qdisc_graft(dev, p, skb, n, clid, NULL, q, extack);
cc7ec456 1504 if (err != 0)
1da177e4 1505 return err;
1da177e4 1506 } else {
7316ae88 1507 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1508 }
1509 return 0;
1510}
1511
1512/*
cc7ec456 1513 * Create/change qdisc.
1da177e4
LT
1514 */
1515
c21ef3e3
DA
1516static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
1517 struct netlink_ext_ack *extack)
1da177e4 1518{
3b1e0a65 1519 struct net *net = sock_net(skb->sk);
1da177e4 1520 struct tcmsg *tcm;
1e90474c 1521 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1522 struct net_device *dev;
1523 u32 clid;
1524 struct Qdisc *q, *p;
1525 int err;
1526
1527replay:
1528 /* Reinit, just in case something touches this. */
8cb08174
JB
1529 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
1530 rtm_tca_policy, extack);
de179c8c
H
1531 if (err < 0)
1532 return err;
1533
02ef22ca 1534 tcm = nlmsg_data(n);
1da177e4
LT
1535 clid = tcm->tcm_parent;
1536 q = p = NULL;
1537
cc7ec456
ED
1538 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1539 if (!dev)
1da177e4
LT
1540 return -ENODEV;
1541
1e90474c 1542
1da177e4
LT
1543 if (clid) {
1544 if (clid != TC_H_ROOT) {
1545 if (clid != TC_H_INGRESS) {
cc7ec456 1546 p = qdisc_lookup(dev, TC_H_MAJ(clid));
09215598
AA
1547 if (!p) {
1548 NL_SET_ERR_MSG(extack, "Failed to find specified qdisc");
1da177e4 1549 return -ENOENT;
09215598 1550 }
1da177e4 1551 q = qdisc_leaf(p, clid);
cc7ec456
ED
1552 } else if (dev_ingress_queue_create(dev)) {
1553 q = dev_ingress_queue(dev)->qdisc_sleeping;
1da177e4
LT
1554 }
1555 } else {
5891cd5e 1556 q = rtnl_dereference(dev->qdisc);
1da177e4
LT
1557 }
1558
1559 /* It may be default qdisc, ignore it */
1560 if (q && q->handle == 0)
1561 q = NULL;
1562
1563 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1564 if (tcm->tcm_handle) {
09215598
AA
1565 if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) {
1566 NL_SET_ERR_MSG(extack, "NLM_F_REPLACE needed to override");
1da177e4 1567 return -EEXIST;
09215598
AA
1568 }
1569 if (TC_H_MIN(tcm->tcm_handle)) {
1570 NL_SET_ERR_MSG(extack, "Invalid minor handle");
1da177e4 1571 return -EINVAL;
09215598 1572 }
cc7ec456 1573 q = qdisc_lookup(dev, tcm->tcm_handle);
8ec69574 1574 if (!q)
1da177e4 1575 goto create_n_graft;
09215598
AA
1576 if (n->nlmsg_flags & NLM_F_EXCL) {
1577 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot override");
1da177e4 1578 return -EEXIST;
09215598 1579 }
0ac4bd68 1580 if (tca[TCA_KIND] &&
09215598
AA
1581 nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1582 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1583 return -EINVAL;
09215598 1584 }
1da177e4 1585 if (q == p ||
09215598
AA
1586 (p && check_loop(q, p, 0))) {
1587 NL_SET_ERR_MSG(extack, "Qdisc parent/child loop detected");
1da177e4 1588 return -ELOOP;
09215598 1589 }
551143d8 1590 qdisc_refcount_inc(q);
1da177e4
LT
1591 goto graft;
1592 } else {
cc7ec456 1593 if (!q)
1da177e4
LT
1594 goto create_n_graft;
1595
1596 /* This magic test requires explanation.
1597 *
1598 * We know, that some child q is already
1599 * attached to this parent and have choice:
1600 * either to change it or to create/graft new one.
1601 *
1602 * 1. We are allowed to create/graft only
1603 * if CREATE and REPLACE flags are set.
1604 *
1605 * 2. If EXCL is set, requestor wanted to say,
1606 * that qdisc tcm_handle is not expected
1607 * to exist, so that we choose create/graft too.
1608 *
1609 * 3. The last case is when no flags are set.
1610 * Alas, it is sort of hole in API, we
1611 * cannot decide what to do unambiguously.
1612 * For now we select create/graft, if
1613 * user gave KIND, which does not match existing.
1614 */
cc7ec456
ED
1615 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1616 (n->nlmsg_flags & NLM_F_REPLACE) &&
1617 ((n->nlmsg_flags & NLM_F_EXCL) ||
1e90474c
PM
1618 (tca[TCA_KIND] &&
1619 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1da177e4
LT
1620 goto create_n_graft;
1621 }
1622 }
1623 } else {
09215598
AA
1624 if (!tcm->tcm_handle) {
1625 NL_SET_ERR_MSG(extack, "Handle cannot be zero");
1da177e4 1626 return -EINVAL;
09215598 1627 }
1da177e4
LT
1628 q = qdisc_lookup(dev, tcm->tcm_handle);
1629 }
1630
1631 /* Change qdisc parameters */
09215598
AA
1632 if (!q) {
1633 NL_SET_ERR_MSG(extack, "Specified qdisc not found");
1da177e4 1634 return -ENOENT;
09215598
AA
1635 }
1636 if (n->nlmsg_flags & NLM_F_EXCL) {
1637 NL_SET_ERR_MSG(extack, "Exclusivity flag on, cannot modify");
1da177e4 1638 return -EEXIST;
09215598
AA
1639 }
1640 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) {
1641 NL_SET_ERR_MSG(extack, "Invalid qdisc name");
1da177e4 1642 return -EINVAL;
09215598
AA
1643 }
1644 err = qdisc_change(q, tca, extack);
1da177e4 1645 if (err == 0)
7316ae88 1646 qdisc_notify(net, skb, n, clid, NULL, q);
1da177e4
LT
1647 return err;
1648
1649create_n_graft:
09215598
AA
1650 if (!(n->nlmsg_flags & NLM_F_CREATE)) {
1651 NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
1da177e4 1652 return -ENOENT;
09215598 1653 }
24824a09 1654 if (clid == TC_H_INGRESS) {
09215598 1655 if (dev_ingress_queue(dev)) {
cfc111d5 1656 q = qdisc_create(dev, dev_ingress_queue(dev),
24824a09 1657 tcm->tcm_parent, tcm->tcm_parent,
09215598
AA
1658 tca, &err, extack);
1659 } else {
1660 NL_SET_ERR_MSG(extack, "Cannot find ingress queue for specified device");
24824a09 1661 err = -ENOENT;
09215598 1662 }
24824a09 1663 } else {
926e61b7 1664 struct netdev_queue *dev_queue;
6ec1c69a
DM
1665
1666 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
926e61b7
JP
1667 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1668 else if (p)
1669 dev_queue = p->dev_queue;
1670 else
1671 dev_queue = netdev_get_tx_queue(dev, 0);
6ec1c69a 1672
cfc111d5 1673 q = qdisc_create(dev, dev_queue,
bb949fbd 1674 tcm->tcm_parent, tcm->tcm_handle,
09215598 1675 tca, &err, extack);
6ec1c69a 1676 }
1da177e4
LT
1677 if (q == NULL) {
1678 if (err == -EAGAIN)
1679 goto replay;
1680 return err;
1681 }
1682
1683graft:
09215598 1684 err = qdisc_graft(dev, p, skb, n, clid, q, NULL, extack);
e5befbd9
IJ
1685 if (err) {
1686 if (q)
86bd446b 1687 qdisc_put(q);
e5befbd9 1688 return err;
1da177e4 1689 }
e5befbd9 1690
1da177e4
LT
1691 return 0;
1692}
1693
30723673
DM
1694static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1695 struct netlink_callback *cb,
49b49971
JK
1696 int *q_idx_p, int s_q_idx, bool recur,
1697 bool dump_invisible)
30723673
DM
1698{
1699 int ret = 0, q_idx = *q_idx_p;
1700 struct Qdisc *q;
59cc1f61 1701 int b;
30723673
DM
1702
1703 if (!root)
1704 return 0;
1705
1706 q = root;
1707 if (q_idx < s_q_idx) {
1708 q_idx++;
1709 } else {
49b49971 1710 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1711 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1712 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1713 RTM_NEWQDISC) <= 0)
30723673
DM
1714 goto done;
1715 q_idx++;
1716 }
69012ae4 1717
ea327469
JK
1718 /* If dumping singletons, there is no qdisc_dev(root) and the singleton
1719 * itself has already been dumped.
1720 *
1721 * If we've already dumped the top-level (ingress) qdisc above and the global
1722 * qdisc hashtable, we don't want to hit it again
1723 */
1724 if (!qdisc_dev(root) || !recur)
69012ae4
JK
1725 goto out;
1726
59cc1f61 1727 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
1728 if (q_idx < s_q_idx) {
1729 q_idx++;
1730 continue;
1731 }
49b49971 1732 if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
15e47304 1733 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
5a7a5555
JHS
1734 cb->nlh->nlmsg_seq, NLM_F_MULTI,
1735 RTM_NEWQDISC) <= 0)
30723673
DM
1736 goto done;
1737 q_idx++;
1738 }
1739
1740out:
1741 *q_idx_p = q_idx;
1742 return ret;
1743done:
1744 ret = -1;
1745 goto out;
1746}
1747
1da177e4
LT
1748static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1749{
3b1e0a65 1750 struct net *net = sock_net(skb->sk);
1da177e4
LT
1751 int idx, q_idx;
1752 int s_idx, s_q_idx;
1753 struct net_device *dev;
49b49971 1754 const struct nlmsghdr *nlh = cb->nlh;
49b49971
JK
1755 struct nlattr *tca[TCA_MAX + 1];
1756 int err;
1da177e4
LT
1757
1758 s_idx = cb->args[0];
1759 s_q_idx = q_idx = cb->args[1];
f1e9016d 1760
7562f876 1761 idx = 0;
15dc36eb 1762 ASSERT_RTNL();
49b49971 1763
8cb08174
JB
1764 err = nlmsg_parse_deprecated(nlh, sizeof(struct tcmsg), tca, TCA_MAX,
1765 rtm_tca_policy, cb->extack);
49b49971
JK
1766 if (err < 0)
1767 return err;
1768
15dc36eb 1769 for_each_netdev(net, dev) {
30723673
DM
1770 struct netdev_queue *dev_queue;
1771
1da177e4 1772 if (idx < s_idx)
7562f876 1773 goto cont;
1da177e4
LT
1774 if (idx > s_idx)
1775 s_q_idx = 0;
1da177e4 1776 q_idx = 0;
30723673 1777
5891cd5e
ED
1778 if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc),
1779 skb, cb, &q_idx, s_q_idx,
49b49971 1780 true, tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1781 goto done;
1782
24824a09
ED
1783 dev_queue = dev_ingress_queue(dev);
1784 if (dev_queue &&
1785 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
49b49971
JK
1786 &q_idx, s_q_idx, false,
1787 tca[TCA_DUMP_INVISIBLE]) < 0)
30723673
DM
1788 goto done;
1789
7562f876
PE
1790cont:
1791 idx++;
1da177e4
LT
1792 }
1793
1794done:
1da177e4
LT
1795 cb->args[0] = idx;
1796 cb->args[1] = q_idx;
1797
1798 return skb->len;
1799}
1800
1801
1802
1803/************************************************
1804 * Traffic classes manipulation. *
1805 ************************************************/
1806
27d7f07c
WC
1807static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1808 unsigned long cl,
1809 u32 portid, u32 seq, u16 flags, int event)
1810{
1811 struct tcmsg *tcm;
1812 struct nlmsghdr *nlh;
1813 unsigned char *b = skb_tail_pointer(skb);
1814 struct gnet_dump d;
1815 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1da177e4 1816
27d7f07c
WC
1817 cond_resched();
1818 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1819 if (!nlh)
1820 goto out_nlmsg_trim;
1821 tcm = nlmsg_data(nlh);
1822 tcm->tcm_family = AF_UNSPEC;
1823 tcm->tcm__pad1 = 0;
1824 tcm->tcm__pad2 = 0;
1825 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1826 tcm->tcm_parent = q->handle;
1827 tcm->tcm_handle = q->handle;
1828 tcm->tcm_info = 0;
1829 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1830 goto nla_put_failure;
1831 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1832 goto nla_put_failure;
1833
1834 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1835 NULL, &d, TCA_PAD) < 0)
1836 goto nla_put_failure;
1837
1838 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1839 goto nla_put_failure;
1840
1841 if (gnet_stats_finish_copy(&d) < 0)
1842 goto nla_put_failure;
1843
1844 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1845 return skb->len;
1846
1847out_nlmsg_trim:
1848nla_put_failure:
1849 nlmsg_trim(skb, b);
1850 return -1;
1851}
1852
1853static int tclass_notify(struct net *net, struct sk_buff *oskb,
1854 struct nlmsghdr *n, struct Qdisc *q,
1855 unsigned long cl, int event)
1856{
1857 struct sk_buff *skb;
1858 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1859
1860 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1861 if (!skb)
1862 return -ENOBUFS;
1863
1864 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
1865 kfree_skb(skb);
1866 return -EINVAL;
1867 }
1868
f79a3bcb
YD
1869 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1870 n->nlmsg_flags & NLM_F_ECHO);
27d7f07c 1871}
1da177e4 1872
14546ba1
WC
1873static int tclass_del_notify(struct net *net,
1874 const struct Qdisc_class_ops *cops,
1875 struct sk_buff *oskb, struct nlmsghdr *n,
4dd78a73
MM
1876 struct Qdisc *q, unsigned long cl,
1877 struct netlink_ext_ack *extack)
14546ba1
WC
1878{
1879 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1880 struct sk_buff *skb;
1881 int err = 0;
1882
1883 if (!cops->delete)
1884 return -EOPNOTSUPP;
1885
1886 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1887 if (!skb)
1888 return -ENOBUFS;
1889
1890 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
1891 RTM_DELTCLASS) < 0) {
1892 kfree_skb(skb);
1893 return -EINVAL;
1894 }
1895
4dd78a73 1896 err = cops->delete(q, cl, extack);
14546ba1
WC
1897 if (err) {
1898 kfree_skb(skb);
1899 return err;
1900 }
1901
5b5f99b1
ZW
1902 err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1903 n->nlmsg_flags & NLM_F_ECHO);
5b5f99b1 1904 return err;
14546ba1
WC
1905}
1906
07d79fc7
CW
1907#ifdef CONFIG_NET_CLS
1908
1909struct tcf_bind_args {
1910 struct tcf_walker w;
2e24cd75 1911 unsigned long base;
07d79fc7 1912 unsigned long cl;
2e24cd75 1913 u32 classid;
07d79fc7
CW
1914};
1915
1916static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
1917{
1918 struct tcf_bind_args *a = (void *)arg;
1919
4e6263ec 1920 if (n && tp->ops->bind_class) {
74e3be60
JP
1921 struct Qdisc *q = tcf_block_q(tp->chain->block);
1922
1923 sch_tree_lock(q);
2e24cd75 1924 tp->ops->bind_class(n, a->classid, a->cl, q, a->base);
74e3be60 1925 sch_tree_unlock(q);
07d79fc7
CW
1926 }
1927 return 0;
1928}
1929
760d228e
CW
1930struct tc_bind_class_args {
1931 struct qdisc_walker w;
1932 unsigned long new_cl;
1933 u32 portid;
1934 u32 clid;
1935};
1936
1937static int tc_bind_class_walker(struct Qdisc *q, unsigned long cl,
1938 struct qdisc_walker *w)
07d79fc7 1939{
760d228e 1940 struct tc_bind_class_args *a = (struct tc_bind_class_args *)w;
07d79fc7
CW
1941 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1942 struct tcf_block *block;
1943 struct tcf_chain *chain;
07d79fc7 1944
cbaacc4e 1945 block = cops->tcf_block(q, cl, NULL);
07d79fc7 1946 if (!block)
760d228e 1947 return 0;
bbf73830
VB
1948 for (chain = tcf_get_next_chain(block, NULL);
1949 chain;
1950 chain = tcf_get_next_chain(block, chain)) {
07d79fc7
CW
1951 struct tcf_proto *tp;
1952
0fca55ed
VB
1953 for (tp = tcf_get_next_proto(chain, NULL);
1954 tp; tp = tcf_get_next_proto(chain, tp)) {
07d79fc7
CW
1955 struct tcf_bind_args arg = {};
1956
1957 arg.w.fn = tcf_node_bind;
760d228e 1958 arg.classid = a->clid;
2e24cd75 1959 arg.base = cl;
760d228e 1960 arg.cl = a->new_cl;
12db03b6 1961 tp->ops->walk(tp, &arg.w, true);
07d79fc7
CW
1962 }
1963 }
760d228e
CW
1964
1965 return 0;
1966}
1967
1968static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1969 unsigned long new_cl)
1970{
1971 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1972 struct tc_bind_class_args args = {};
1973
1974 if (!cops->tcf_block)
1975 return;
1976 args.portid = portid;
1977 args.clid = clid;
1978 args.new_cl = new_cl;
1979 args.w.fn = tc_bind_class_walker;
1980 q->ops->cl_ops->walk(q, &args.w);
07d79fc7
CW
1981}
1982
1983#else
1984
1985static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
1986 unsigned long new_cl)
1987{
1988}
1989
1990#endif
1991
c21ef3e3
DA
1992static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
1993 struct netlink_ext_ack *extack)
1da177e4 1994{
3b1e0a65 1995 struct net *net = sock_net(skb->sk);
02ef22ca 1996 struct tcmsg *tcm = nlmsg_data(n);
1e90474c 1997 struct nlattr *tca[TCA_MAX + 1];
1da177e4
LT
1998 struct net_device *dev;
1999 struct Qdisc *q = NULL;
20fea08b 2000 const struct Qdisc_class_ops *cops;
1da177e4
LT
2001 unsigned long cl = 0;
2002 unsigned long new_cl;
de179c8c
H
2003 u32 portid;
2004 u32 clid;
2005 u32 qid;
1da177e4
LT
2006 int err;
2007
8cb08174
JB
2008 err = nlmsg_parse_deprecated(n, sizeof(*tcm), tca, TCA_MAX,
2009 rtm_tca_policy, extack);
1e90474c
PM
2010 if (err < 0)
2011 return err;
2012
de179c8c
H
2013 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2014 if (!dev)
2015 return -ENODEV;
2016
1da177e4
LT
2017 /*
2018 parent == TC_H_UNSPEC - unspecified parent.
2019 parent == TC_H_ROOT - class is root, which has no parent.
2020 parent == X:0 - parent is root class.
2021 parent == X:Y - parent is a node in hierarchy.
2022 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
2023
2024 handle == 0:0 - generate handle from kernel pool.
2025 handle == 0:Y - class is X:Y, where X:0 is qdisc.
2026 handle == X:Y - clear.
2027 handle == X:0 - root class.
2028 */
2029
2030 /* Step 1. Determine qdisc handle X:0 */
2031
de179c8c
H
2032 portid = tcm->tcm_parent;
2033 clid = tcm->tcm_handle;
2034 qid = TC_H_MAJ(clid);
2035
15e47304
EB
2036 if (portid != TC_H_ROOT) {
2037 u32 qid1 = TC_H_MAJ(portid);
1da177e4
LT
2038
2039 if (qid && qid1) {
2040 /* If both majors are known, they must be identical. */
2041 if (qid != qid1)
2042 return -EINVAL;
2043 } else if (qid1) {
2044 qid = qid1;
2045 } else if (qid == 0)
5891cd5e 2046 qid = rtnl_dereference(dev->qdisc)->handle;
1da177e4
LT
2047
2048 /* Now qid is genuine qdisc handle consistent
cc7ec456
ED
2049 * both with parent and child.
2050 *
15e47304 2051 * TC_H_MAJ(portid) still may be unspecified, complete it now.
1da177e4 2052 */
15e47304
EB
2053 if (portid)
2054 portid = TC_H_MAKE(qid, portid);
1da177e4
LT
2055 } else {
2056 if (qid == 0)
5891cd5e 2057 qid = rtnl_dereference(dev->qdisc)->handle;
1da177e4
LT
2058 }
2059
2060 /* OK. Locate qdisc */
cc7ec456
ED
2061 q = qdisc_lookup(dev, qid);
2062 if (!q)
1da177e4
LT
2063 return -ENOENT;
2064
2065 /* An check that it supports classes */
2066 cops = q->ops->cl_ops;
2067 if (cops == NULL)
2068 return -EINVAL;
2069
2070 /* Now try to get class */
2071 if (clid == 0) {
15e47304 2072 if (portid == TC_H_ROOT)
1da177e4
LT
2073 clid = qid;
2074 } else
2075 clid = TC_H_MAKE(qid, clid);
2076
2077 if (clid)
143976ce 2078 cl = cops->find(q, clid);
1da177e4
LT
2079
2080 if (cl == 0) {
2081 err = -ENOENT;
cc7ec456
ED
2082 if (n->nlmsg_type != RTM_NEWTCLASS ||
2083 !(n->nlmsg_flags & NLM_F_CREATE))
1da177e4
LT
2084 goto out;
2085 } else {
2086 switch (n->nlmsg_type) {
10297b99 2087 case RTM_NEWTCLASS:
1da177e4 2088 err = -EEXIST;
cc7ec456 2089 if (n->nlmsg_flags & NLM_F_EXCL)
1da177e4
LT
2090 goto out;
2091 break;
2092 case RTM_DELTCLASS:
4dd78a73 2093 err = tclass_del_notify(net, cops, skb, n, q, cl, extack);
07d79fc7
CW
2094 /* Unbind the class with flilters with 0 */
2095 tc_bind_tclass(q, portid, clid, 0);
1da177e4
LT
2096 goto out;
2097 case RTM_GETTCLASS:
7316ae88 2098 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
1da177e4
LT
2099 goto out;
2100 default:
2101 err = -EINVAL;
2102 goto out;
2103 }
2104 }
2105
d47a6b0e
JP
2106 if (tca[TCA_INGRESS_BLOCK] || tca[TCA_EGRESS_BLOCK]) {
2107 NL_SET_ERR_MSG(extack, "Shared blocks are not supported for classes");
2108 return -EOPNOTSUPP;
2109 }
2110
1da177e4 2111 new_cl = cl;
de6d5cdf
PM
2112 err = -EOPNOTSUPP;
2113 if (cops->change)
793d81d6 2114 err = cops->change(q, clid, portid, tca, &new_cl, extack);
07d79fc7 2115 if (err == 0) {
7316ae88 2116 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
07d79fc7
CW
2117 /* We just create a new class, need to do reverse binding. */
2118 if (cl != new_cl)
2119 tc_bind_tclass(q, portid, clid, new_cl);
2120 }
1da177e4 2121out:
1da177e4
LT
2122 return err;
2123}
2124
cc7ec456
ED
2125struct qdisc_dump_args {
2126 struct qdisc_walker w;
2127 struct sk_buff *skb;
2128 struct netlink_callback *cb;
1da177e4
LT
2129};
2130
5a7a5555
JHS
2131static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
2132 struct qdisc_walker *arg)
1da177e4
LT
2133{
2134 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
2135
15e47304 2136 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
5a7a5555
JHS
2137 a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2138 RTM_NEWTCLASS);
1da177e4
LT
2139}
2140
30723673
DM
2141static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
2142 struct tcmsg *tcm, struct netlink_callback *cb,
2143 int *t_p, int s_t)
2144{
2145 struct qdisc_dump_args arg;
2146
49b49971 2147 if (tc_qdisc_dump_ignore(q, false) ||
30723673
DM
2148 *t_p < s_t || !q->ops->cl_ops ||
2149 (tcm->tcm_parent &&
2150 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
2151 (*t_p)++;
2152 return 0;
2153 }
2154 if (*t_p > s_t)
2155 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
2156 arg.w.fn = qdisc_class_dump;
2157 arg.skb = skb;
2158 arg.cb = cb;
2159 arg.w.stop = 0;
2160 arg.w.skip = cb->args[1];
2161 arg.w.count = 0;
2162 q->ops->cl_ops->walk(q, &arg.w);
2163 cb->args[1] = arg.w.count;
2164 if (arg.w.stop)
2165 return -1;
2166 (*t_p)++;
2167 return 0;
2168}
2169
2170static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
2171 struct tcmsg *tcm, struct netlink_callback *cb,
bfc25605 2172 int *t_p, int s_t, bool recur)
30723673
DM
2173{
2174 struct Qdisc *q;
59cc1f61 2175 int b;
30723673
DM
2176
2177 if (!root)
2178 return 0;
2179
2180 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
2181 return -1;
2182
bfc25605 2183 if (!qdisc_dev(root) || !recur)
69012ae4
JK
2184 return 0;
2185
cb395b20
ED
2186 if (tcm->tcm_parent) {
2187 q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent));
3c53ed8f
PS
2188 if (q && q != root &&
2189 tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
cb395b20
ED
2190 return -1;
2191 return 0;
2192 }
59cc1f61 2193 hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) {
30723673
DM
2194 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
2195 return -1;
2196 }
2197
2198 return 0;
2199}
2200
1da177e4
LT
2201static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
2202{
02ef22ca 2203 struct tcmsg *tcm = nlmsg_data(cb->nlh);
3b1e0a65 2204 struct net *net = sock_net(skb->sk);
30723673 2205 struct netdev_queue *dev_queue;
1da177e4 2206 struct net_device *dev;
30723673 2207 int t, s_t;
1da177e4 2208
573ce260 2209 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
1da177e4 2210 return 0;
cc7ec456
ED
2211 dev = dev_get_by_index(net, tcm->tcm_ifindex);
2212 if (!dev)
1da177e4
LT
2213 return 0;
2214
2215 s_t = cb->args[0];
2216 t = 0;
2217
5891cd5e
ED
2218 if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc),
2219 skb, tcm, cb, &t, s_t, true) < 0)
30723673
DM
2220 goto done;
2221
24824a09
ED
2222 dev_queue = dev_ingress_queue(dev);
2223 if (dev_queue &&
2224 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
bfc25605 2225 &t, s_t, false) < 0)
30723673 2226 goto done;
1da177e4 2227
30723673 2228done:
1da177e4
LT
2229 cb->args[0] = t;
2230
2231 dev_put(dev);
2232 return skb->len;
2233}
2234
1da177e4
LT
2235#ifdef CONFIG_PROC_FS
2236static int psched_show(struct seq_file *seq, void *v)
2237{
2238 seq_printf(seq, "%08x %08x %08x %08x\n",
ca44d6e6 2239 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
514bca32 2240 1000000,
1e317688 2241 (u32)NSEC_PER_SEC / hrtimer_resolution);
1da177e4
LT
2242
2243 return 0;
2244}
2245
7316ae88
TG
2246static int __net_init psched_net_init(struct net *net)
2247{
2248 struct proc_dir_entry *e;
2249
3f3942ac 2250 e = proc_create_single("psched", 0, net->proc_net, psched_show);
7316ae88
TG
2251 if (e == NULL)
2252 return -ENOMEM;
2253
2254 return 0;
2255}
2256
2257static void __net_exit psched_net_exit(struct net *net)
2258{
ece31ffd 2259 remove_proc_entry("psched", net->proc_net);
7316ae88
TG
2260}
2261#else
2262static int __net_init psched_net_init(struct net *net)
2263{
2264 return 0;
2265}
2266
2267static void __net_exit psched_net_exit(struct net *net)
2268{
2269}
1da177e4
LT
2270#endif
2271
7316ae88
TG
2272static struct pernet_operations psched_net_ops = {
2273 .init = psched_net_init,
2274 .exit = psched_net_exit,
2275};
2276
7f0e8102
PT
2277DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper);
2278
1da177e4
LT
2279static int __init pktsched_init(void)
2280{
7316ae88
TG
2281 int err;
2282
2283 err = register_pernet_subsys(&psched_net_ops);
2284 if (err) {
cc7ec456 2285 pr_err("pktsched_init: "
7316ae88
TG
2286 "cannot initialize per netns operations\n");
2287 return err;
2288 }
2289
6da7c8fc 2290 register_qdisc(&pfifo_fast_ops);
1da177e4
LT
2291 register_qdisc(&pfifo_qdisc_ops);
2292 register_qdisc(&bfifo_qdisc_ops);
57dbb2d8 2293 register_qdisc(&pfifo_head_drop_qdisc_ops);
6ec1c69a 2294 register_qdisc(&mq_qdisc_ops);
d66d6c31 2295 register_qdisc(&noqueue_qdisc_ops);
1da177e4 2296
b97bac64
FW
2297 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0);
2298 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0);
5a7a5555 2299 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc,
b97bac64
FW
2300 0);
2301 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0);
2302 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0);
5a7a5555 2303 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass,
b97bac64 2304 0);
be577ddc 2305
7f0e8102
PT
2306 tc_wrapper_init();
2307
1da177e4
LT
2308 return 0;
2309}
2310
2311subsys_initcall(pktsched_init);