net: sch: api: add extack support in tcf_block_get
[linux-block.git] / net / sched / sch_generic.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_generic.c Generic packet scheduler routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 * Jamal Hadi Salim, <hadi@cyberus.ca> 990601
11 * - Ingress support
12 */
13
1da177e4 14#include <linux/bitops.h>
1da177e4
LT
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/sched.h>
19#include <linux/string.h>
1da177e4 20#include <linux/errno.h>
1da177e4
LT
21#include <linux/netdevice.h>
22#include <linux/skbuff.h>
23#include <linux/rtnetlink.h>
24#include <linux/init.h>
25#include <linux/rcupdate.h>
26#include <linux/list.h>
5a0e3ad6 27#include <linux/slab.h>
07ce76aa 28#include <linux/if_vlan.h>
c5ad119f 29#include <linux/skb_array.h>
32d3e51a 30#include <linux/if_macvlan.h>
292f1c7f 31#include <net/sch_generic.h>
1da177e4 32#include <net/pkt_sched.h>
7fee226a 33#include <net/dst.h>
e543002f 34#include <trace/events/qdisc.h>
1da177e4 35
34aedd3f 36/* Qdisc to use by default */
37const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
38EXPORT_SYMBOL(default_qdisc_ops);
39
1da177e4
LT
40/* Main transmission queue. */
41
0463d4ae 42/* Modifications to data participating in scheduling must be protected with
5fb66229 43 * qdisc_lock(qdisc) spinlock.
0463d4ae
PM
44 *
45 * The idea is the following:
c7e4f3bb
DM
46 * - enqueue, dequeue are serialized via qdisc root lock
47 * - ingress filtering is also serialized via qdisc root lock
0463d4ae 48 * - updates to tree and tree walking are only done under the rtnl mutex.
1da177e4 49 */
70e57d5e
JF
50
51static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
52{
53 const struct netdev_queue *txq = q->dev_queue;
54 spinlock_t *lock = NULL;
55 struct sk_buff *skb;
56
57 if (q->flags & TCQ_F_NOLOCK) {
58 lock = qdisc_lock(q);
59 spin_lock(lock);
60 }
61
62 skb = skb_peek(&q->skb_bad_txq);
63 if (skb) {
64 /* check the reason of requeuing without tx lock first */
65 txq = skb_get_tx_queue(txq->dev, skb);
66 if (!netif_xmit_frozen_or_stopped(txq)) {
67 skb = __skb_dequeue(&q->skb_bad_txq);
68 if (qdisc_is_percpu_stats(q)) {
69 qdisc_qstats_cpu_backlog_dec(q, skb);
70 qdisc_qstats_cpu_qlen_dec(q);
71 } else {
72 qdisc_qstats_backlog_dec(q, skb);
73 q->q.qlen--;
74 }
75 } else {
76 skb = NULL;
77 }
78 }
79
80 if (lock)
81 spin_unlock(lock);
82
83 return skb;
84}
85
86static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
87{
88 struct sk_buff *skb = skb_peek(&q->skb_bad_txq);
89
90 if (unlikely(skb))
91 skb = __skb_dequeue_bad_txq(q);
92
93 return skb;
94}
95
96static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
97 struct sk_buff *skb)
98{
99 spinlock_t *lock = NULL;
100
101 if (q->flags & TCQ_F_NOLOCK) {
102 lock = qdisc_lock(q);
103 spin_lock(lock);
104 }
105
106 __skb_queue_tail(&q->skb_bad_txq, skb);
107
108 if (lock)
109 spin_unlock(lock);
110}
111
a53851e2 112static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
c716a81a 113{
a53851e2 114 __skb_queue_head(&q->gso_skb, skb);
53e91503 115 q->qstats.requeues++;
a27758ff 116 qdisc_qstats_backlog_inc(q, skb);
bbd8a0d3 117 q->q.qlen++; /* it's still part of the queue */
37437bb2 118 __netif_schedule(q);
6252352d 119
c716a81a
JHS
120 return 0;
121}
122
a53851e2
JF
123static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
124{
125 spinlock_t *lock = qdisc_lock(q);
126
127 spin_lock(lock);
128 __skb_queue_tail(&q->gso_skb, skb);
129 spin_unlock(lock);
130
131 qdisc_qstats_cpu_requeues_inc(q);
132 qdisc_qstats_cpu_backlog_inc(q, skb);
133 qdisc_qstats_cpu_qlen_inc(q);
134 __netif_schedule(q);
135
136 return 0;
137}
138
139static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
140{
141 if (q->flags & TCQ_F_NOLOCK)
142 return dev_requeue_skb_locked(skb, q);
143 else
144 return __dev_requeue_skb(skb, q);
145}
146
55a93b3e
ED
147static void try_bulk_dequeue_skb(struct Qdisc *q,
148 struct sk_buff *skb,
b8358d70
JDB
149 const struct netdev_queue *txq,
150 int *packets)
5772e9a3 151{
55a93b3e 152 int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
5772e9a3
JDB
153
154 while (bytelimit > 0) {
55a93b3e 155 struct sk_buff *nskb = q->dequeue(q);
5772e9a3 156
55a93b3e 157 if (!nskb)
5772e9a3
JDB
158 break;
159
55a93b3e
ED
160 bytelimit -= nskb->len; /* covers GSO len */
161 skb->next = nskb;
162 skb = nskb;
b8358d70 163 (*packets)++; /* GSO counts as one pkt */
5772e9a3 164 }
55a93b3e 165 skb->next = NULL;
5772e9a3
JDB
166}
167
4d202a0d
ED
168/* This variant of try_bulk_dequeue_skb() makes sure
169 * all skbs in the chain are for the same txq
170 */
171static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
172 struct sk_buff *skb,
173 int *packets)
174{
175 int mapping = skb_get_queue_mapping(skb);
176 struct sk_buff *nskb;
177 int cnt = 0;
178
179 do {
180 nskb = q->dequeue(q);
181 if (!nskb)
182 break;
183 if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
70e57d5e
JF
184 qdisc_enqueue_skb_bad_txq(q, nskb);
185
186 if (qdisc_is_percpu_stats(q)) {
187 qdisc_qstats_cpu_backlog_inc(q, nskb);
188 qdisc_qstats_cpu_qlen_inc(q);
189 } else {
190 qdisc_qstats_backlog_inc(q, nskb);
191 q->q.qlen++;
192 }
4d202a0d
ED
193 break;
194 }
195 skb->next = nskb;
196 skb = nskb;
197 } while (++cnt < 8);
198 (*packets) += cnt;
199 skb->next = NULL;
200}
201
5772e9a3
JDB
202/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
203 * A requeued skb (via q->gso_skb) can also be a SKB list.
204 */
b8358d70
JDB
205static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
206 int *packets)
c716a81a 207{
1abbe139 208 const struct netdev_queue *txq = q->dev_queue;
fd8e8d1a 209 struct sk_buff *skb = NULL;
554794de 210
b8358d70 211 *packets = 1;
a53851e2
JF
212 if (unlikely(!skb_queue_empty(&q->gso_skb))) {
213 spinlock_t *lock = NULL;
214
215 if (q->flags & TCQ_F_NOLOCK) {
216 lock = qdisc_lock(q);
217 spin_lock(lock);
218 }
219
220 skb = skb_peek(&q->gso_skb);
221
222 /* skb may be null if another cpu pulls gso_skb off in between
223 * empty check and lock.
224 */
225 if (!skb) {
226 if (lock)
227 spin_unlock(lock);
228 goto validate;
229 }
230
4d202a0d
ED
231 /* skb in gso_skb were already validated */
232 *validate = false;
ebf05982 233 /* check the reason of requeuing without tx lock first */
10c51b56 234 txq = skb_get_tx_queue(txq->dev, skb);
73466498 235 if (!netif_xmit_frozen_or_stopped(txq)) {
a53851e2
JF
236 skb = __skb_dequeue(&q->gso_skb);
237 if (qdisc_is_percpu_stats(q)) {
238 qdisc_qstats_cpu_backlog_dec(q, skb);
239 qdisc_qstats_cpu_qlen_dec(q);
240 } else {
241 qdisc_qstats_backlog_dec(q, skb);
242 q->q.qlen--;
243 }
244 } else {
ebf05982 245 skb = NULL;
a53851e2
JF
246 }
247 if (lock)
248 spin_unlock(lock);
e543002f 249 goto trace;
4d202a0d 250 }
a53851e2 251validate:
4d202a0d 252 *validate = true;
fd8e8d1a
JF
253
254 if ((q->flags & TCQ_F_ONETXQUEUE) &&
255 netif_xmit_frozen_or_stopped(txq))
256 return skb;
257
70e57d5e
JF
258 skb = qdisc_dequeue_skb_bad_txq(q);
259 if (unlikely(skb))
260 goto bulk;
fd8e8d1a 261 skb = q->dequeue(q);
4d202a0d
ED
262 if (skb) {
263bulk:
264 if (qdisc_may_bulk(q))
265 try_bulk_dequeue_skb(q, skb, txq, packets);
266 else
267 try_bulk_dequeue_skb_slow(q, skb, packets);
ebf05982 268 }
e543002f
JDB
269trace:
270 trace_qdisc_dequeue(q, txq, *packets, skb);
c716a81a
JHS
271 return skb;
272}
273
10297b99 274/*
10770bc2 275 * Transmit possibly several skbs, and handle the return status as
f9eb8aea 276 * required. Owning running seqcount bit guarantees that
10770bc2 277 * only one CPU can execute this function.
6c1361a6
KK
278 *
279 * Returns to the caller:
29b86cda
JF
280 * false - hardware queue frozen backoff
281 * true - feel free to send more pkts
6c1361a6 282 */
29b86cda
JF
283bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
284 struct net_device *dev, struct netdev_queue *txq,
285 spinlock_t *root_lock, bool validate)
1da177e4 286{
5f1a485d 287 int ret = NETDEV_TX_BUSY;
7698b4fc
DM
288
289 /* And release qdisc */
6b3ba914
JF
290 if (root_lock)
291 spin_unlock(root_lock);
c716a81a 292
55a93b3e
ED
293 /* Note that we validate skb (GSO, checksum, ...) outside of locks */
294 if (validate)
295 skb = validate_xmit_skb_list(skb, dev);
572a9d7b 296
3dcd493f 297 if (likely(skb)) {
55a93b3e
ED
298 HARD_TX_LOCK(dev, txq, smp_processor_id());
299 if (!netif_xmit_frozen_or_stopped(txq))
300 skb = dev_hard_start_xmit(skb, dev, txq, &ret);
c716a81a 301
55a93b3e 302 HARD_TX_UNLOCK(dev, txq);
3dcd493f 303 } else {
6b3ba914
JF
304 if (root_lock)
305 spin_lock(root_lock);
29b86cda 306 return true;
55a93b3e 307 }
6b3ba914
JF
308
309 if (root_lock)
310 spin_lock(root_lock);
c716a81a 311
29b86cda 312 if (!dev_xmit_complete(ret)) {
6c1361a6 313 /* Driver returned NETDEV_TX_BUSY - requeue skb */
e87cc472
JP
314 if (unlikely(ret != NETDEV_TX_BUSY))
315 net_warn_ratelimited("BUG %s code %d qlen %d\n",
316 dev->name, ret, q->q.qlen);
6c1361a6 317
29b86cda
JF
318 dev_requeue_skb(skb, q);
319 return false;
6c1361a6 320 }
c716a81a 321
73466498 322 if (ret && netif_xmit_frozen_or_stopped(txq))
29b86cda 323 return false;
37437bb2 324
29b86cda 325 return true;
1da177e4
LT
326}
327
bbd8a0d3
KK
328/*
329 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
330 *
f9eb8aea 331 * running seqcount guarantees only one CPU can process
bbd8a0d3
KK
332 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
333 * this queue.
334 *
335 * netif_tx_lock serializes accesses to device driver.
336 *
337 * qdisc_lock(q) and netif_tx_lock are mutually exclusive,
338 * if one is grabbed, another must be free.
339 *
340 * Note, that this procedure can be called by a watchdog timer
341 *
342 * Returns to the caller:
343 * 0 - queue is empty or throttled.
344 * >0 - queue is not empty.
345 *
346 */
29b86cda 347static inline bool qdisc_restart(struct Qdisc *q, int *packets)
bbd8a0d3 348{
6b3ba914 349 spinlock_t *root_lock = NULL;
bbd8a0d3
KK
350 struct netdev_queue *txq;
351 struct net_device *dev;
bbd8a0d3 352 struct sk_buff *skb;
55a93b3e 353 bool validate;
bbd8a0d3
KK
354
355 /* Dequeue packet */
b8358d70 356 skb = dequeue_skb(q, &validate, packets);
bbd8a0d3 357 if (unlikely(!skb))
29b86cda 358 return false;
10c51b56 359
6b3ba914
JF
360 if (!(q->flags & TCQ_F_NOLOCK))
361 root_lock = qdisc_lock(q);
362
bbd8a0d3 363 dev = qdisc_dev(q);
10c51b56 364 txq = skb_get_tx_queue(dev, skb);
bbd8a0d3 365
55a93b3e 366 return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
bbd8a0d3
KK
367}
368
37437bb2 369void __qdisc_run(struct Qdisc *q)
48d83325 370{
3d48b53f 371 int quota = dev_tx_weight;
b8358d70 372 int packets;
2ba2506c 373
b8358d70 374 while (qdisc_restart(q, &packets)) {
2ba2506c 375 /*
d5b8aa1d 376 * Ordered by possible occurrence: Postpone processing if
377 * 1. we've exceeded packet quota
378 * 2. another process needs the CPU;
2ba2506c 379 */
b8358d70
JDB
380 quota -= packets;
381 if (quota <= 0 || need_resched()) {
37437bb2 382 __netif_schedule(q);
d90df3ad 383 break;
2ba2506c
HX
384 }
385 }
48d83325
HX
386}
387
9d21493b
ED
388unsigned long dev_trans_start(struct net_device *dev)
389{
07ce76aa 390 unsigned long val, res;
9d21493b
ED
391 unsigned int i;
392
07ce76aa 393 if (is_vlan_dev(dev))
394 dev = vlan_dev_real_dev(dev);
32d3e51a
CD
395 else if (netif_is_macvlan(dev))
396 dev = macvlan_dev_real_dev(dev);
9b36627a
FW
397 res = netdev_get_tx_queue(dev, 0)->trans_start;
398 for (i = 1; i < dev->num_tx_queues; i++) {
9d21493b
ED
399 val = netdev_get_tx_queue(dev, i)->trans_start;
400 if (val && time_after(val, res))
401 res = val;
402 }
07ce76aa 403
9d21493b
ED
404 return res;
405}
406EXPORT_SYMBOL(dev_trans_start);
407
cdeabbb8 408static void dev_watchdog(struct timer_list *t)
1da177e4 409{
cdeabbb8 410 struct net_device *dev = from_timer(dev, t, watchdog_timer);
1da177e4 411
932ff279 412 netif_tx_lock(dev);
e8a0464c 413 if (!qdisc_tx_is_noop(dev)) {
1da177e4
LT
414 if (netif_device_present(dev) &&
415 netif_running(dev) &&
416 netif_carrier_ok(dev)) {
9d21493b 417 int some_queue_timedout = 0;
e8a0464c 418 unsigned int i;
9d21493b 419 unsigned long trans_start;
e8a0464c
DM
420
421 for (i = 0; i < dev->num_tx_queues; i++) {
422 struct netdev_queue *txq;
423
424 txq = netdev_get_tx_queue(dev, i);
9b36627a 425 trans_start = txq->trans_start;
73466498 426 if (netif_xmit_stopped(txq) &&
9d21493b
ED
427 time_after(jiffies, (trans_start +
428 dev->watchdog_timeo))) {
429 some_queue_timedout = 1;
ccf5ff69 430 txq->trans_timeout++;
e8a0464c
DM
431 break;
432 }
433 }
338f7566 434
9d21493b 435 if (some_queue_timedout) {
9d21493b 436 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
3019de12 437 dev->name, netdev_drivername(dev), i);
d314774c 438 dev->netdev_ops->ndo_tx_timeout(dev);
1da177e4 439 }
e8a0464c
DM
440 if (!mod_timer(&dev->watchdog_timer,
441 round_jiffies(jiffies +
442 dev->watchdog_timeo)))
1da177e4
LT
443 dev_hold(dev);
444 }
445 }
932ff279 446 netif_tx_unlock(dev);
1da177e4
LT
447
448 dev_put(dev);
449}
450
1da177e4
LT
451void __netdev_watchdog_up(struct net_device *dev)
452{
d314774c 453 if (dev->netdev_ops->ndo_tx_timeout) {
1da177e4
LT
454 if (dev->watchdog_timeo <= 0)
455 dev->watchdog_timeo = 5*HZ;
60468d5b
VP
456 if (!mod_timer(&dev->watchdog_timer,
457 round_jiffies(jiffies + dev->watchdog_timeo)))
1da177e4
LT
458 dev_hold(dev);
459 }
460}
461
462static void dev_watchdog_up(struct net_device *dev)
463{
1da177e4 464 __netdev_watchdog_up(dev);
1da177e4
LT
465}
466
467static void dev_watchdog_down(struct net_device *dev)
468{
932ff279 469 netif_tx_lock_bh(dev);
1da177e4 470 if (del_timer(&dev->watchdog_timer))
15333061 471 dev_put(dev);
932ff279 472 netif_tx_unlock_bh(dev);
1da177e4
LT
473}
474
bea3348e
SH
475/**
476 * netif_carrier_on - set carrier
477 * @dev: network device
478 *
479 * Device has detected that carrier.
480 */
0a242efc
DV
481void netif_carrier_on(struct net_device *dev)
482{
bfaae0f0 483 if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
b4730016
DM
484 if (dev->reg_state == NETREG_UNINITIALIZED)
485 return;
2d3b479d 486 atomic_inc(&dev->carrier_changes);
0a242efc 487 linkwatch_fire_event(dev);
bfaae0f0
JG
488 if (netif_running(dev))
489 __netdev_watchdog_up(dev);
490 }
0a242efc 491}
62e3ba1b 492EXPORT_SYMBOL(netif_carrier_on);
0a242efc 493
bea3348e
SH
494/**
495 * netif_carrier_off - clear carrier
496 * @dev: network device
497 *
498 * Device has detected loss of carrier.
499 */
0a242efc
DV
500void netif_carrier_off(struct net_device *dev)
501{
b4730016
DM
502 if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
503 if (dev->reg_state == NETREG_UNINITIALIZED)
504 return;
2d3b479d 505 atomic_inc(&dev->carrier_changes);
0a242efc 506 linkwatch_fire_event(dev);
b4730016 507 }
0a242efc 508}
62e3ba1b 509EXPORT_SYMBOL(netif_carrier_off);
0a242efc 510
1da177e4
LT
511/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
512 under all circumstances. It is difficult to invent anything faster or
513 cheaper.
514 */
515
520ac30f
ED
516static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
517 struct sk_buff **to_free)
1da177e4 518{
520ac30f 519 __qdisc_drop(skb, to_free);
1da177e4
LT
520 return NET_XMIT_CN;
521}
522
82d567c2 523static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
1da177e4
LT
524{
525 return NULL;
526}
527
20fea08b 528struct Qdisc_ops noop_qdisc_ops __read_mostly = {
1da177e4
LT
529 .id = "noop",
530 .priv_size = 0,
531 .enqueue = noop_enqueue,
532 .dequeue = noop_dequeue,
99c0db26 533 .peek = noop_dequeue,
1da177e4
LT
534 .owner = THIS_MODULE,
535};
536
7698b4fc 537static struct netdev_queue noop_netdev_queue = {
7698b4fc 538 .qdisc = &noop_qdisc,
9f3ffae0 539 .qdisc_sleeping = &noop_qdisc,
7698b4fc
DM
540};
541
1da177e4
LT
542struct Qdisc noop_qdisc = {
543 .enqueue = noop_enqueue,
544 .dequeue = noop_dequeue,
545 .flags = TCQ_F_BUILTIN,
10297b99 546 .ops = &noop_qdisc_ops,
83874000 547 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
7698b4fc 548 .dev_queue = &noop_netdev_queue,
f9eb8aea 549 .running = SEQCNT_ZERO(noop_qdisc.running),
7b5edbc4 550 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
1da177e4 551};
62e3ba1b 552EXPORT_SYMBOL(noop_qdisc);
1da177e4 553
e63d7dfd
AA
554static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
555 struct netlink_ext_ack *extack)
d66d6c31
PS
556{
557 /* register_qdisc() assigns a default of noop_enqueue if unset,
558 * but __dev_queue_xmit() treats noqueue only as such
559 * if this is NULL - so clear it here. */
560 qdisc->enqueue = NULL;
561 return 0;
562}
563
564struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
1da177e4
LT
565 .id = "noqueue",
566 .priv_size = 0,
d66d6c31 567 .init = noqueue_init,
1da177e4
LT
568 .enqueue = noop_enqueue,
569 .dequeue = noop_dequeue,
99c0db26 570 .peek = noop_dequeue,
1da177e4
LT
571 .owner = THIS_MODULE,
572};
573
cc7ec456
ED
574static const u8 prio2band[TC_PRIO_MAX + 1] = {
575 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
576};
d3678b46
DM
577
578/* 3-band FIFO queue: old style, but should be a bit faster than
579 generic prio+fifo combination.
580 */
581
582#define PFIFO_FAST_BANDS 3
583
fd3ae5e8
KK
584/*
585 * Private data for a pfifo_fast scheduler containing:
c5ad119f 586 * - rings for priority bands
fd3ae5e8
KK
587 */
588struct pfifo_fast_priv {
c5ad119f 589 struct skb_array q[PFIFO_FAST_BANDS];
fd3ae5e8
KK
590};
591
c5ad119f
JF
592static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
593 int band)
d3678b46 594{
c5ad119f 595 return &priv->q[band];
d3678b46
DM
596}
597
520ac30f
ED
598static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
599 struct sk_buff **to_free)
321090e7 600{
c5ad119f
JF
601 int band = prio2band[skb->priority & TC_PRIO_MAX];
602 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
603 struct skb_array *q = band2list(priv, band);
604 int err;
821d24ae 605
c5ad119f
JF
606 err = skb_array_produce(q, skb);
607
608 if (unlikely(err))
609 return qdisc_drop_cpu(skb, qdisc, to_free);
610
611 qdisc_qstats_cpu_qlen_inc(qdisc);
612 qdisc_qstats_cpu_backlog_inc(qdisc, skb);
613 return NET_XMIT_SUCCESS;
1da177e4
LT
614}
615
cc7ec456 616static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
1da177e4 617{
fd3ae5e8 618 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
c5ad119f
JF
619 struct sk_buff *skb = NULL;
620 int band;
ec323368 621
c5ad119f
JF
622 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
623 struct skb_array *q = band2list(priv, band);
fd3ae5e8 624
c5ad119f
JF
625 if (__skb_array_empty(q))
626 continue;
fd3ae5e8 627
c5ad119f
JF
628 skb = skb_array_consume_bh(q);
629 }
630 if (likely(skb)) {
631 qdisc_qstats_cpu_backlog_dec(qdisc, skb);
632 qdisc_bstats_cpu_update(qdisc, skb);
633 qdisc_qstats_cpu_qlen_dec(qdisc);
d3678b46 634 }
f87a9c3d 635
c5ad119f 636 return skb;
1da177e4
LT
637}
638
cc7ec456 639static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
99c0db26 640{
fd3ae5e8 641 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
c5ad119f
JF
642 struct sk_buff *skb = NULL;
643 int band;
fd3ae5e8 644
c5ad119f
JF
645 for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
646 struct skb_array *q = band2list(priv, band);
99c0db26 647
c5ad119f 648 skb = __skb_array_peek(q);
99c0db26
JP
649 }
650
c5ad119f 651 return skb;
99c0db26
JP
652}
653
cc7ec456 654static void pfifo_fast_reset(struct Qdisc *qdisc)
1da177e4 655{
c5ad119f 656 int i, band;
fd3ae5e8 657 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
d3678b46 658
c5ad119f
JF
659 for (band = 0; band < PFIFO_FAST_BANDS; band++) {
660 struct skb_array *q = band2list(priv, band);
661 struct sk_buff *skb;
d3678b46 662
1df94c3c
CW
663 /* NULL ring is possible if destroy path is due to a failed
664 * skb_array_init() in pfifo_fast_init() case.
665 */
666 if (!q->ring.queue)
667 continue;
668
c5ad119f
JF
669 while ((skb = skb_array_consume_bh(q)) != NULL)
670 kfree_skb(skb);
671 }
672
673 for_each_possible_cpu(i) {
674 struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
675
676 q->backlog = 0;
677 q->qlen = 0;
678 }
1da177e4
LT
679}
680
d3678b46
DM
681static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
682{
683 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
684
cc7ec456 685 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
1b34ec43
DM
686 if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
687 goto nla_put_failure;
d3678b46
DM
688 return skb->len;
689
690nla_put_failure:
691 return -1;
692}
693
e63d7dfd
AA
694static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
695 struct netlink_ext_ack *extack)
d3678b46 696{
c5ad119f 697 unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
fd3ae5e8 698 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
c5ad119f
JF
699 int prio;
700
701 /* guard against zero length rings */
702 if (!qlen)
703 return -EINVAL;
d3678b46 704
c5ad119f
JF
705 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
706 struct skb_array *q = band2list(priv, prio);
707 int err;
708
709 err = skb_array_init(q, qlen, GFP_KERNEL);
710 if (err)
711 return -ENOMEM;
712 }
d3678b46 713
23624935
ED
714 /* Can by-pass the queue discipline */
715 qdisc->flags |= TCQ_F_CAN_BYPASS;
d3678b46
DM
716 return 0;
717}
718
c5ad119f
JF
719static void pfifo_fast_destroy(struct Qdisc *sch)
720{
721 struct pfifo_fast_priv *priv = qdisc_priv(sch);
722 int prio;
723
724 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
725 struct skb_array *q = band2list(priv, prio);
726
727 /* NULL ring is possible if destroy path is due to a failed
728 * skb_array_init() in pfifo_fast_init() case.
729 */
1df94c3c 730 if (!q->ring.queue)
c5ad119f
JF
731 continue;
732 /* Destroy ring but no need to kfree_skb because a call to
733 * pfifo_fast_reset() has already done that work.
734 */
735 ptr_ring_cleanup(&q->ring, NULL);
736 }
737}
738
6ec1c69a 739struct Qdisc_ops pfifo_fast_ops __read_mostly = {
d3678b46 740 .id = "pfifo_fast",
fd3ae5e8 741 .priv_size = sizeof(struct pfifo_fast_priv),
d3678b46
DM
742 .enqueue = pfifo_fast_enqueue,
743 .dequeue = pfifo_fast_dequeue,
99c0db26 744 .peek = pfifo_fast_peek,
d3678b46 745 .init = pfifo_fast_init,
c5ad119f 746 .destroy = pfifo_fast_destroy,
d3678b46
DM
747 .reset = pfifo_fast_reset,
748 .dump = pfifo_fast_dump,
1da177e4 749 .owner = THIS_MODULE,
c5ad119f 750 .static_flags = TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
1da177e4 751};
1f27cde3 752EXPORT_SYMBOL(pfifo_fast_ops);
1da177e4 753
23d3b8bf 754static struct lock_class_key qdisc_tx_busylock;
f9eb8aea 755static struct lock_class_key qdisc_running_key;
23d3b8bf 756
5ce2d488 757struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
d2a7f269 758 const struct Qdisc_ops *ops)
1da177e4
LT
759{
760 void *p;
761 struct Qdisc *sch;
d276055c 762 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
3d54b82f 763 int err = -ENOBUFS;
26aa0459
JSP
764 struct net_device *dev;
765
766 if (!dev_queue) {
767 err = -EINVAL;
768 goto errout;
769 }
1da177e4 770
26aa0459 771 dev = dev_queue->dev;
f2cd2d3e
ED
772 p = kzalloc_node(size, GFP_KERNEL,
773 netdev_queue_numa_node_read(dev_queue));
774
1da177e4 775 if (!p)
3d54b82f 776 goto errout;
3d54b82f 777 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
d276055c
ED
778 /* if we got non aligned memory, ask more and do alignment ourself */
779 if (sch != p) {
780 kfree(p);
781 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
782 netdev_queue_numa_node_read(dev_queue));
783 if (!p)
784 goto errout;
785 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
786 sch->padded = (char *) sch - (char *) p;
787 }
a53851e2 788 __skb_queue_head_init(&sch->gso_skb);
70e57d5e 789 __skb_queue_head_init(&sch->skb_bad_txq);
48da34b7
FW
790 qdisc_skb_head_init(&sch->q);
791 spin_lock_init(&sch->q.lock);
23d3b8bf 792
d59f5ffa
JF
793 if (ops->static_flags & TCQ_F_CPUSTATS) {
794 sch->cpu_bstats =
795 netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
796 if (!sch->cpu_bstats)
797 goto errout1;
798
799 sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
800 if (!sch->cpu_qstats) {
801 free_percpu(sch->cpu_bstats);
802 goto errout1;
803 }
804 }
805
79640a4c 806 spin_lock_init(&sch->busylock);
23d3b8bf
ED
807 lockdep_set_class(&sch->busylock,
808 dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
809
f9eb8aea
ED
810 seqcount_init(&sch->running);
811 lockdep_set_class(&sch->running,
812 dev->qdisc_running_key ?: &qdisc_running_key);
813
1da177e4 814 sch->ops = ops;
d59f5ffa 815 sch->flags = ops->static_flags;
1da177e4
LT
816 sch->enqueue = ops->enqueue;
817 sch->dequeue = ops->dequeue;
bb949fbd 818 sch->dev_queue = dev_queue;
23d3b8bf 819 dev_hold(dev);
7b936405 820 refcount_set(&sch->refcnt, 1);
3d54b82f
TG
821
822 return sch;
d59f5ffa
JF
823errout1:
824 kfree(p);
3d54b82f 825errout:
01e123d7 826 return ERR_PTR(err);
3d54b82f
TG
827}
828
3511c913 829struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
d2a7f269 830 const struct Qdisc_ops *ops,
831 unsigned int parentid)
3d54b82f
TG
832{
833 struct Qdisc *sch;
10297b99 834
6da7c8fc 835 if (!try_module_get(ops->owner))
166ee5b8 836 return NULL;
6da7c8fc 837
5ce2d488 838 sch = qdisc_alloc(dev_queue, ops);
166ee5b8
ED
839 if (IS_ERR(sch)) {
840 module_put(ops->owner);
841 return NULL;
842 }
9f9afec4 843 sch->parent = parentid;
3d54b82f 844
e63d7dfd 845 if (!ops->init || ops->init(sch, NULL, NULL) == 0)
1da177e4
LT
846 return sch;
847
0fbbeb1b 848 qdisc_destroy(sch);
1da177e4
LT
849 return NULL;
850}
62e3ba1b 851EXPORT_SYMBOL(qdisc_create_dflt);
1da177e4 852
5fb66229 853/* Under qdisc_lock(qdisc) and BH! */
1da177e4
LT
854
855void qdisc_reset(struct Qdisc *qdisc)
856{
20fea08b 857 const struct Qdisc_ops *ops = qdisc->ops;
a53851e2 858 struct sk_buff *skb, *tmp;
1da177e4
LT
859
860 if (ops->reset)
861 ops->reset(qdisc);
67305ebc 862
a53851e2
JF
863 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
864 __skb_unlink(skb, &qdisc->gso_skb);
865 kfree_skb_list(skb);
bbd8a0d3 866 }
a53851e2 867
70e57d5e
JF
868 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
869 __skb_unlink(skb, &qdisc->skb_bad_txq);
870 kfree_skb_list(skb);
871 }
872
4d202a0d 873 qdisc->q.qlen = 0;
c8e18129 874 qdisc->qstats.backlog = 0;
1da177e4 875}
62e3ba1b 876EXPORT_SYMBOL(qdisc_reset);
1da177e4 877
752fbcc3 878static void qdisc_free(struct Qdisc *qdisc)
5d944c64 879{
73c20a8b 880 if (qdisc_is_percpu_stats(qdisc)) {
22e0f8b9 881 free_percpu(qdisc->cpu_bstats);
73c20a8b
JF
882 free_percpu(qdisc->cpu_qstats);
883 }
22e0f8b9 884
5d944c64
ED
885 kfree((char *) qdisc - qdisc->padded);
886}
887
1e0d5a57 888void qdisc_destroy(struct Qdisc *qdisc)
1da177e4 889{
8a34c5dc 890 const struct Qdisc_ops *ops = qdisc->ops;
a53851e2 891 struct sk_buff *skb, *tmp;
8a34c5dc 892
1e0d5a57 893 if (qdisc->flags & TCQ_F_BUILTIN ||
7b936405 894 !refcount_dec_and_test(&qdisc->refcnt))
1e0d5a57
DM
895 return;
896
3a682fbd 897#ifdef CONFIG_NET_SCHED
59cc1f61 898 qdisc_hash_del(qdisc);
f6e0b239 899
a2da570d 900 qdisc_put_stab(rtnl_dereference(qdisc->stab));
3a682fbd 901#endif
1c0d32fd 902 gen_kill_estimator(&qdisc->rate_est);
8a34c5dc
DM
903 if (ops->reset)
904 ops->reset(qdisc);
905 if (ops->destroy)
906 ops->destroy(qdisc);
907
908 module_put(ops->owner);
909 dev_put(qdisc_dev(qdisc));
910
a53851e2
JF
911 skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
912 __skb_unlink(skb, &qdisc->gso_skb);
913 kfree_skb_list(skb);
914 }
915
70e57d5e
JF
916 skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
917 __skb_unlink(skb, &qdisc->skb_bad_txq);
918 kfree_skb_list(skb);
919 }
920
752fbcc3 921 qdisc_free(qdisc);
1da177e4 922}
62e3ba1b 923EXPORT_SYMBOL(qdisc_destroy);
1da177e4 924
589983cd
PM
925/* Attach toplevel qdisc to device queue. */
926struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
927 struct Qdisc *qdisc)
928{
929 struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
930 spinlock_t *root_lock;
931
932 root_lock = qdisc_lock(oqdisc);
933 spin_lock_bh(root_lock);
934
589983cd
PM
935 /* ... and graft new one */
936 if (qdisc == NULL)
937 qdisc = &noop_qdisc;
938 dev_queue->qdisc_sleeping = qdisc;
939 rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
940
941 spin_unlock_bh(root_lock);
942
943 return oqdisc;
944}
b8970f0b 945EXPORT_SYMBOL(dev_graft_qdisc);
589983cd 946
e8a0464c
DM
947static void attach_one_default_qdisc(struct net_device *dev,
948 struct netdev_queue *dev_queue,
949 void *_unused)
950{
3e692f21
PS
951 struct Qdisc *qdisc;
952 const struct Qdisc_ops *ops = default_qdisc_ops;
e8a0464c 953
3e692f21
PS
954 if (dev->priv_flags & IFF_NO_QUEUE)
955 ops = &noqueue_qdisc_ops;
956
957 qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
958 if (!qdisc) {
959 netdev_info(dev, "activation failed\n");
960 return;
e8a0464c 961 }
3e692f21 962 if (!netif_is_multiqueue(dev))
4eaf3b84 963 qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
e8a0464c
DM
964 dev_queue->qdisc_sleeping = qdisc;
965}
966
6ec1c69a
DM
967static void attach_default_qdiscs(struct net_device *dev)
968{
969 struct netdev_queue *txq;
970 struct Qdisc *qdisc;
971
972 txq = netdev_get_tx_queue(dev, 0);
973
4b469955 974 if (!netif_is_multiqueue(dev) ||
4b469955 975 dev->priv_flags & IFF_NO_QUEUE) {
6ec1c69a
DM
976 netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
977 dev->qdisc = txq->qdisc_sleeping;
551143d8 978 qdisc_refcount_inc(dev->qdisc);
6ec1c69a 979 } else {
3511c913 980 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
6ec1c69a 981 if (qdisc) {
6ec1c69a 982 dev->qdisc = qdisc;
e57a784d 983 qdisc->ops->attach(qdisc);
6ec1c69a
DM
984 }
985 }
59cc1f61 986#ifdef CONFIG_NET_SCHED
92f91706 987 if (dev->qdisc != &noop_qdisc)
49b49971 988 qdisc_hash_add(dev->qdisc, false);
59cc1f61 989#endif
6ec1c69a
DM
990}
991
e8a0464c
DM
992static void transition_one_qdisc(struct net_device *dev,
993 struct netdev_queue *dev_queue,
994 void *_need_watchdog)
995{
83874000 996 struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
e8a0464c
DM
997 int *need_watchdog_p = _need_watchdog;
998
a9312ae8
DM
999 if (!(new_qdisc->flags & TCQ_F_BUILTIN))
1000 clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
1001
83874000 1002 rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
3e692f21 1003 if (need_watchdog_p) {
9d21493b 1004 dev_queue->trans_start = 0;
e8a0464c 1005 *need_watchdog_p = 1;
9d21493b 1006 }
e8a0464c
DM
1007}
1008
1da177e4
LT
1009void dev_activate(struct net_device *dev)
1010{
e8a0464c 1011 int need_watchdog;
b0e1e646 1012
1da177e4 1013 /* No queueing discipline is attached to device;
6da7c8fc 1014 * create default one for devices, which need queueing
1015 * and noqueue_qdisc for virtual interfaces
1da177e4
LT
1016 */
1017
6ec1c69a
DM
1018 if (dev->qdisc == &noop_qdisc)
1019 attach_default_qdiscs(dev);
af356afa 1020
cacaddf5
TC
1021 if (!netif_carrier_ok(dev))
1022 /* Delay activation until next carrier-on event */
1023 return;
1024
e8a0464c
DM
1025 need_watchdog = 0;
1026 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
24824a09
ED
1027 if (dev_ingress_queue(dev))
1028 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
e8a0464c
DM
1029
1030 if (need_watchdog) {
860e9538 1031 netif_trans_update(dev);
1da177e4
LT
1032 dev_watchdog_up(dev);
1033 }
b0e1e646 1034}
b8970f0b 1035EXPORT_SYMBOL(dev_activate);
b0e1e646 1036
e8a0464c
DM
1037static void dev_deactivate_queue(struct net_device *dev,
1038 struct netdev_queue *dev_queue,
1039 void *_qdisc_default)
b0e1e646 1040{
e8a0464c 1041 struct Qdisc *qdisc_default = _qdisc_default;
970565bb 1042 struct Qdisc *qdisc;
970565bb 1043
46e5da40 1044 qdisc = rtnl_dereference(dev_queue->qdisc);
b0e1e646 1045 if (qdisc) {
83874000
DM
1046 spin_lock_bh(qdisc_lock(qdisc));
1047
a9312ae8
DM
1048 if (!(qdisc->flags & TCQ_F_BUILTIN))
1049 set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
1050
f7a54c13 1051 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
b0e1e646 1052 qdisc_reset(qdisc);
d3b753db 1053
83874000 1054 spin_unlock_bh(qdisc_lock(qdisc));
b0e1e646 1055 }
1da177e4
LT
1056}
1057
4335cd2d 1058static bool some_qdisc_is_busy(struct net_device *dev)
e8a0464c
DM
1059{
1060 unsigned int i;
1061
1062 for (i = 0; i < dev->num_tx_queues; i++) {
1063 struct netdev_queue *dev_queue;
7698b4fc 1064 spinlock_t *root_lock;
e2627c8c 1065 struct Qdisc *q;
e8a0464c
DM
1066 int val;
1067
1068 dev_queue = netdev_get_tx_queue(dev, i);
b9a3b110 1069 q = dev_queue->qdisc_sleeping;
e8a0464c 1070
6b3ba914
JF
1071 if (q->flags & TCQ_F_NOLOCK) {
1072 val = test_bit(__QDISC_STATE_SCHED, &q->state);
1073 } else {
1074 root_lock = qdisc_lock(q);
1075 spin_lock_bh(root_lock);
e8a0464c 1076
6b3ba914
JF
1077 val = (qdisc_is_running(q) ||
1078 test_bit(__QDISC_STATE_SCHED, &q->state));
e8a0464c 1079
6b3ba914
JF
1080 spin_unlock_bh(root_lock);
1081 }
e8a0464c
DM
1082
1083 if (val)
1084 return true;
1085 }
1086 return false;
1087}
1088
7bbde83b
JF
1089static void dev_qdisc_reset(struct net_device *dev,
1090 struct netdev_queue *dev_queue,
1091 void *none)
1092{
1093 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1094
1095 if (qdisc)
1096 qdisc_reset(qdisc);
1097}
1098
3137663d
ED
1099/**
1100 * dev_deactivate_many - deactivate transmissions on several devices
1101 * @head: list of devices to deactivate
1102 *
1103 * This function returns only when all outstanding transmissions
1104 * have completed, unless all devices are in dismantle phase.
1105 */
44345724 1106void dev_deactivate_many(struct list_head *head)
1da177e4 1107{
44345724 1108 struct net_device *dev;
41a23b07 1109
5cde2829 1110 list_for_each_entry(dev, head, close_list) {
44345724
OP
1111 netdev_for_each_tx_queue(dev, dev_deactivate_queue,
1112 &noop_qdisc);
1113 if (dev_ingress_queue(dev))
1114 dev_deactivate_queue(dev, dev_ingress_queue(dev),
1115 &noop_qdisc);
1116
1117 dev_watchdog_down(dev);
1118 }
1da177e4 1119
3137663d
ED
1120 /* Wait for outstanding qdisc-less dev_queue_xmit calls.
1121 * This is avoided if all devices are in dismantle phase :
1122 * Caller will call synchronize_net() for us
1123 */
7bbde83b 1124 synchronize_net();
1da177e4 1125
d4828d85 1126 /* Wait for outstanding qdisc_run calls. */
7bbde83b 1127 list_for_each_entry(dev, head, close_list) {
44345724
OP
1128 while (some_qdisc_is_busy(dev))
1129 yield();
7bbde83b
JF
1130 /* The new qdisc is assigned at this point so we can safely
1131 * unwind stale skb lists and qdisc statistics
1132 */
1133 netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
1134 if (dev_ingress_queue(dev))
1135 dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
1136 }
44345724
OP
1137}
1138
1139void dev_deactivate(struct net_device *dev)
1140{
1141 LIST_HEAD(single);
1142
5cde2829 1143 list_add(&dev->close_list, &single);
44345724 1144 dev_deactivate_many(&single);
5f04d506 1145 list_del(&single);
1da177e4 1146}
b8970f0b 1147EXPORT_SYMBOL(dev_deactivate);
1da177e4 1148
b0e1e646
DM
1149static void dev_init_scheduler_queue(struct net_device *dev,
1150 struct netdev_queue *dev_queue,
e8a0464c 1151 void *_qdisc)
b0e1e646 1152{
e8a0464c
DM
1153 struct Qdisc *qdisc = _qdisc;
1154
46e5da40 1155 rcu_assign_pointer(dev_queue->qdisc, qdisc);
b0e1e646 1156 dev_queue->qdisc_sleeping = qdisc;
a53851e2 1157 __skb_queue_head_init(&qdisc->gso_skb);
70e57d5e 1158 __skb_queue_head_init(&qdisc->skb_bad_txq);
b0e1e646
DM
1159}
1160
1da177e4
LT
1161void dev_init_scheduler(struct net_device *dev)
1162{
af356afa 1163 dev->qdisc = &noop_qdisc;
e8a0464c 1164 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
24824a09
ED
1165 if (dev_ingress_queue(dev))
1166 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1da177e4 1167
cdeabbb8 1168 timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
1da177e4
LT
1169}
1170
e8a0464c
DM
1171static void shutdown_scheduler_queue(struct net_device *dev,
1172 struct netdev_queue *dev_queue,
1173 void *_qdisc_default)
1da177e4 1174{
b0e1e646 1175 struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
e8a0464c 1176 struct Qdisc *qdisc_default = _qdisc_default;
b0e1e646
DM
1177
1178 if (qdisc) {
f7a54c13 1179 rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
b0e1e646 1180 dev_queue->qdisc_sleeping = qdisc_default;
1da177e4 1181
1da177e4 1182 qdisc_destroy(qdisc);
10297b99 1183 }
b0e1e646
DM
1184}
1185
1186void dev_shutdown(struct net_device *dev)
1187{
e8a0464c 1188 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
24824a09
ED
1189 if (dev_ingress_queue(dev))
1190 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
af356afa
PM
1191 qdisc_destroy(dev->qdisc);
1192 dev->qdisc = &noop_qdisc;
1193
547b792c 1194 WARN_ON(timer_pending(&dev->watchdog_timer));
1da177e4 1195}
292f1c7f 1196
01cb71d2 1197void psched_ratecfg_precompute(struct psched_ratecfg *r,
3e1e3aae
ED
1198 const struct tc_ratespec *conf,
1199 u64 rate64)
292f1c7f 1200{
01cb71d2
ED
1201 memset(r, 0, sizeof(*r));
1202 r->overhead = conf->overhead;
3e1e3aae 1203 r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
8a8e3d84 1204 r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
292f1c7f
JP
1205 r->mult = 1;
1206 /*
130d3d68
ED
1207 * The deal here is to replace a divide by a reciprocal one
1208 * in fast path (a reciprocal divide is a multiply and a shift)
1209 *
1210 * Normal formula would be :
1211 * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
1212 *
1213 * We compute mult/shift to use instead :
1214 * time_in_ns = (len * mult) >> shift;
1215 *
1216 * We try to get the highest possible mult value for accuracy,
1217 * but have to make sure no overflows will ever happen.
292f1c7f 1218 */
130d3d68
ED
1219 if (r->rate_bytes_ps > 0) {
1220 u64 factor = NSEC_PER_SEC;
1221
1222 for (;;) {
1223 r->mult = div64_u64(factor, r->rate_bytes_ps);
1224 if (r->mult & (1U << 31) || factor & (1ULL << 63))
292f1c7f 1225 break;
130d3d68
ED
1226 factor <<= 1;
1227 r->shift++;
292f1c7f 1228 }
292f1c7f
JP
1229 }
1230}
1231EXPORT_SYMBOL(psched_ratecfg_precompute);
46209401
JP
1232
1233static void mini_qdisc_rcu_func(struct rcu_head *head)
1234{
1235}
1236
1237void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
1238 struct tcf_proto *tp_head)
1239{
1240 struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
1241 struct mini_Qdisc *miniq;
1242
1243 if (!tp_head) {
1244 RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
1245 return;
1246 }
1247
1248 miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
1249 &miniqp->miniq1 : &miniqp->miniq2;
1250
1251 /* We need to make sure that readers won't see the miniq
1252 * we are about to modify. So wait until previous call_rcu_bh callback
1253 * is done.
1254 */
1255 rcu_barrier_bh();
1256 miniq->filter_list = tp_head;
1257 rcu_assign_pointer(*miniqp->p_miniq, miniq);
1258
1259 if (miniq_old)
1260 /* This is counterpart of the rcu barrier above. We need to
1261 * block potential new user of miniq_old until all readers
1262 * are not seeing it.
1263 */
1264 call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
1265}
1266EXPORT_SYMBOL(mini_qdisc_pair_swap);
1267
1268void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
1269 struct mini_Qdisc __rcu **p_miniq)
1270{
1271 miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
1272 miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
1273 miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
1274 miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
1275 miniqp->p_miniq = p_miniq;
1276}
1277EXPORT_SYMBOL(mini_qdisc_pair_init);