[NET_SCHED]: act_api: qdisc internal reclassify support
[linux-2.6-block.git] / net / sched / sch_cbq.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/string.h>
1da177e4 17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
0ba48053 19#include <net/netlink.h>
1da177e4
LT
20#include <net/pkt_sched.h>
21
22
23/* Class-Based Queueing (CBQ) algorithm.
24 =======================================
25
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
10297b99 27 Management Models for Packet Networks",
1da177e4
LT
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
10297b99 30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
1da177e4 31
10297b99 32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
1da177e4
LT
33 Parameters", 1996
34
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
37
38 -----------------------------------------------------------------------
39
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
44
45 --- The WRR algorithm is different. Our version looks more
10297b99
YH
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
1da177e4
LT
52
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
63
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
70
71struct cbq_sched_data;
72
73
74struct cbq_class
75{
76 struct cbq_class *next; /* hash table link */
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
78
79/* Parameters */
80 u32 classid;
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
73ca4918 85#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
86 unsigned char police;
87#endif
88
89 u32 defmap;
90
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
93 long offtime;
94 long minidle;
95 u32 avpkt;
96 struct qdisc_rate_table *R_tab;
97
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
1a13cb63 100 psched_tdiff_t penalty;
1da177e4
LT
101
102 /* General scheduler (WRR) parameters */
103 long allot;
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
106
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
112 parent otherwise */
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
115
116 struct Qdisc *q; /* Elementary queueing discipline */
117
118
119/* Variables */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
125 */
126
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
129 long avgidle;
130 long deficit; /* Saved deficit for WRR */
1a13cb63 131 psched_time_t penalized;
1da177e4
LT
132 struct gnet_stats_basic bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
1da177e4
LT
135 struct tc_cbq_xstats xstats;
136
137 struct tcf_proto *filter_list;
138
139 int refcnt;
140 int filters;
141
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
143};
144
145struct cbq_sched_data
146{
147 struct cbq_class *classes[16]; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
150
151 struct cbq_class link;
152
153 unsigned activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
155 with backlog */
156
73ca4918 157#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
158 struct cbq_class *rx_class;
159#endif
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
162 int tx_len;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask;
166
1a13cb63 167 struct hrtimer delay_timer;
88a99354 168 struct qdisc_watchdog watchdog; /* Watchdog timer,
1da177e4
LT
169 started when CBQ has
170 backlog, but cannot
171 transmit just now */
88a99354 172 psched_tdiff_t wd_expires;
1da177e4
LT
173 int toplevel;
174 u32 hgenerator;
175};
176
177
178#define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
179
180
181static __inline__ unsigned cbq_hash(u32 h)
182{
183 h ^= h>>8;
184 h ^= h>>4;
185 return h&0xF;
186}
187
188static __inline__ struct cbq_class *
189cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
190{
191 struct cbq_class *cl;
192
193 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194 if (cl->classid == classid)
195 return cl;
196 return NULL;
197}
198
73ca4918 199#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
200
201static struct cbq_class *
202cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
203{
204 struct cbq_class *cl, *new;
205
206 for (cl = this->tparent; cl; cl = cl->tparent)
207 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
208 return new;
209
210 return NULL;
211}
212
213#endif
214
215/* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
217 transparently.
218
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
222 to the split node.
223 */
224
225static struct cbq_class *
226cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227{
228 struct cbq_sched_data *q = qdisc_priv(sch);
229 struct cbq_class *head = &q->link;
230 struct cbq_class **defmap;
231 struct cbq_class *cl = NULL;
232 u32 prio = skb->priority;
233 struct tcf_result res;
234
235 /*
236 * Step 1. If skb->priority points to one of our classes, use it.
237 */
238 if (TC_H_MAJ(prio^sch->handle) == 0 &&
239 (cl = cbq_class_lookup(q, prio)) != NULL)
240 return cl;
241
29f1df6c 242 *qerr = NET_XMIT_BYPASS;
1da177e4
LT
243 for (;;) {
244 int result = 0;
245 defmap = head->defaults;
246
247 /*
248 * Step 2+n. Apply classifier.
249 */
73ca4918
PM
250 if (!head->filter_list ||
251 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
1da177e4
LT
252 goto fallback;
253
254 if ((cl = (void*)res.class) == NULL) {
255 if (TC_H_MAJ(res.classid))
256 cl = cbq_class_lookup(q, res.classid);
257 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
258 cl = defmap[TC_PRIO_BESTEFFORT];
259
260 if (cl == NULL || cl->level >= head->level)
261 goto fallback;
262 }
263
264#ifdef CONFIG_NET_CLS_ACT
265 switch (result) {
266 case TC_ACT_QUEUED:
10297b99 267 case TC_ACT_STOLEN:
1da177e4
LT
268 *qerr = NET_XMIT_SUCCESS;
269 case TC_ACT_SHOT:
270 return NULL;
73ca4918
PM
271 case TC_ACT_RECLASSIFY:
272 return cbq_reclassify(skb, cl);
1da177e4
LT
273 }
274#elif defined(CONFIG_NET_CLS_POLICE)
275 switch (result) {
276 case TC_POLICE_RECLASSIFY:
277 return cbq_reclassify(skb, cl);
278 case TC_POLICE_SHOT:
279 return NULL;
280 default:
281 break;
282 }
283#endif
284 if (cl->level == 0)
285 return cl;
286
287 /*
288 * Step 3+n. If classifier selected a link sharing class,
289 * apply agency specific classifier.
290 * Repeat this procdure until we hit a leaf node.
291 */
292 head = cl;
293 }
294
295fallback:
296 cl = head;
297
298 /*
299 * Step 4. No success...
300 */
301 if (TC_H_MAJ(prio) == 0 &&
302 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
303 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
304 return head;
305
306 return cl;
307}
308
309/*
310 A packet has just been enqueued on the empty class.
311 cbq_activate_class adds it to the tail of active class list
312 of its priority band.
313 */
314
315static __inline__ void cbq_activate_class(struct cbq_class *cl)
316{
317 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
318 int prio = cl->cpriority;
319 struct cbq_class *cl_tail;
320
321 cl_tail = q->active[prio];
322 q->active[prio] = cl;
323
324 if (cl_tail != NULL) {
325 cl->next_alive = cl_tail->next_alive;
326 cl_tail->next_alive = cl;
327 } else {
328 cl->next_alive = cl;
329 q->activemask |= (1<<prio);
330 }
331}
332
333/*
334 Unlink class from active chain.
335 Note that this same procedure is done directly in cbq_dequeue*
336 during round-robin procedure.
337 */
338
339static void cbq_deactivate_class(struct cbq_class *this)
340{
341 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
342 int prio = this->cpriority;
343 struct cbq_class *cl;
344 struct cbq_class *cl_prev = q->active[prio];
345
346 do {
347 cl = cl_prev->next_alive;
348 if (cl == this) {
349 cl_prev->next_alive = cl->next_alive;
350 cl->next_alive = NULL;
351
352 if (cl == q->active[prio]) {
353 q->active[prio] = cl_prev;
354 if (cl == q->active[prio]) {
355 q->active[prio] = NULL;
356 q->activemask &= ~(1<<prio);
357 return;
358 }
359 }
1da177e4
LT
360 return;
361 }
362 } while ((cl_prev = cl) != q->active[prio]);
363}
364
365static void
366cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
367{
368 int toplevel = q->toplevel;
369
370 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
371 psched_time_t now;
372 psched_tdiff_t incr;
373
3bebcda2 374 now = psched_get_time();
8edc0c31 375 incr = now - q->now_rt;
7c59e25f 376 now = q->now + incr;
1da177e4
LT
377
378 do {
104e0878 379 if (cl->undertime < now) {
1da177e4
LT
380 q->toplevel = cl->level;
381 return;
382 }
383 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
384 }
385}
386
387static int
388cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
389{
390 struct cbq_sched_data *q = qdisc_priv(sch);
391 int len = skb->len;
392 int ret;
393 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
394
73ca4918 395#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
396 q->rx_class = cl;
397#endif
398 if (cl == NULL) {
29f1df6c 399 if (ret == NET_XMIT_BYPASS)
1da177e4
LT
400 sch->qstats.drops++;
401 kfree_skb(skb);
402 return ret;
403 }
404
73ca4918 405#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
406 cl->q->__parent = sch;
407#endif
408 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
409 sch->q.qlen++;
410 sch->bstats.packets++;
411 sch->bstats.bytes+=len;
412 cbq_mark_toplevel(q, cl);
413 if (!cl->next_alive)
414 cbq_activate_class(cl);
415 return ret;
416 }
417
418 sch->qstats.drops++;
419 cbq_mark_toplevel(q, cl);
420 cl->qstats.drops++;
421 return ret;
422}
423
424static int
425cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
426{
427 struct cbq_sched_data *q = qdisc_priv(sch);
428 struct cbq_class *cl;
429 int ret;
430
431 if ((cl = q->tx_class) == NULL) {
432 kfree_skb(skb);
433 sch->qstats.drops++;
434 return NET_XMIT_CN;
435 }
436 q->tx_class = NULL;
437
438 cbq_mark_toplevel(q, cl);
439
73ca4918 440#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
441 q->rx_class = cl;
442 cl->q->__parent = sch;
443#endif
444 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
445 sch->q.qlen++;
446 sch->qstats.requeues++;
447 if (!cl->next_alive)
448 cbq_activate_class(cl);
449 return 0;
450 }
451 sch->qstats.drops++;
452 cl->qstats.drops++;
453 return ret;
454}
455
456/* Overlimit actions */
457
458/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
459
460static void cbq_ovl_classic(struct cbq_class *cl)
461{
462 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 463 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
464
465 if (!cl->delayed) {
466 delay += cl->offtime;
467
10297b99 468 /*
1da177e4
LT
469 Class goes to sleep, so that it will have no
470 chance to work avgidle. Let's forgive it 8)
471
472 BTW cbq-2.0 has a crap in this
473 place, apparently they forgot to shift it by cl->ewma_log.
474 */
475 if (cl->avgidle < 0)
476 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
477 if (cl->avgidle < cl->minidle)
478 cl->avgidle = cl->minidle;
479 if (delay <= 0)
480 delay = 1;
7c59e25f 481 cl->undertime = q->now + delay;
1da177e4
LT
482
483 cl->xstats.overactions++;
484 cl->delayed = 1;
485 }
486 if (q->wd_expires == 0 || q->wd_expires > delay)
487 q->wd_expires = delay;
488
489 /* Dirty work! We must schedule wakeups based on
490 real available rate, rather than leaf rate,
491 which may be tiny (even zero).
492 */
493 if (q->toplevel == TC_CBQ_MAXLEVEL) {
494 struct cbq_class *b;
495 psched_tdiff_t base_delay = q->wd_expires;
496
497 for (b = cl->borrow; b; b = b->borrow) {
8edc0c31 498 delay = b->undertime - q->now;
1da177e4
LT
499 if (delay < base_delay) {
500 if (delay <= 0)
501 delay = 1;
502 base_delay = delay;
503 }
504 }
505
506 q->wd_expires = base_delay;
507 }
508}
509
510/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
511 they go overlimit
512 */
513
514static void cbq_ovl_rclassic(struct cbq_class *cl)
515{
516 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
517 struct cbq_class *this = cl;
518
519 do {
520 if (cl->level > q->toplevel) {
521 cl = NULL;
522 break;
523 }
524 } while ((cl = cl->borrow) != NULL);
525
526 if (cl == NULL)
527 cl = this;
528 cbq_ovl_classic(cl);
529}
530
531/* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
532
533static void cbq_ovl_delay(struct cbq_class *cl)
534{
535 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
8edc0c31 536 psched_tdiff_t delay = cl->undertime - q->now;
1da177e4
LT
537
538 if (!cl->delayed) {
1a13cb63
PM
539 psched_time_t sched = q->now;
540 ktime_t expires;
1da177e4
LT
541
542 delay += cl->offtime;
543 if (cl->avgidle < 0)
544 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
545 if (cl->avgidle < cl->minidle)
546 cl->avgidle = cl->minidle;
7c59e25f 547 cl->undertime = q->now + delay;
1da177e4
LT
548
549 if (delay > 0) {
1a13cb63 550 sched += delay + cl->penalty;
1da177e4
LT
551 cl->penalized = sched;
552 cl->cpriority = TC_CBQ_MAXPRIO;
553 q->pmask |= (1<<TC_CBQ_MAXPRIO);
1a13cb63
PM
554
555 expires = ktime_set(0, 0);
556 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
557 if (hrtimer_try_to_cancel(&q->delay_timer) &&
558 ktime_to_ns(ktime_sub(q->delay_timer.expires,
559 expires)) > 0)
560 q->delay_timer.expires = expires;
561 hrtimer_restart(&q->delay_timer);
1da177e4
LT
562 cl->delayed = 1;
563 cl->xstats.overactions++;
564 return;
565 }
566 delay = 1;
567 }
568 if (q->wd_expires == 0 || q->wd_expires > delay)
569 q->wd_expires = delay;
570}
571
572/* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
573
574static void cbq_ovl_lowprio(struct cbq_class *cl)
575{
576 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
577
1a13cb63 578 cl->penalized = q->now + cl->penalty;
1da177e4
LT
579
580 if (cl->cpriority != cl->priority2) {
581 cl->cpriority = cl->priority2;
582 q->pmask |= (1<<cl->cpriority);
583 cl->xstats.overactions++;
584 }
585 cbq_ovl_classic(cl);
586}
587
588/* TC_CBQ_OVL_DROP: penalize class by dropping */
589
590static void cbq_ovl_drop(struct cbq_class *cl)
591{
592 if (cl->q->ops->drop)
593 if (cl->q->ops->drop(cl->q))
594 cl->qdisc->q.qlen--;
595 cl->xstats.overactions++;
596 cbq_ovl_classic(cl);
597}
598
1a13cb63
PM
599static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
600 psched_time_t now)
1da177e4
LT
601{
602 struct cbq_class *cl;
603 struct cbq_class *cl_prev = q->active[prio];
1a13cb63 604 psched_time_t sched = now;
1da177e4
LT
605
606 if (cl_prev == NULL)
e9054a33 607 return 0;
1da177e4
LT
608
609 do {
610 cl = cl_prev->next_alive;
1a13cb63 611 if (now - cl->penalized > 0) {
1da177e4
LT
612 cl_prev->next_alive = cl->next_alive;
613 cl->next_alive = NULL;
614 cl->cpriority = cl->priority;
615 cl->delayed = 0;
616 cbq_activate_class(cl);
617
618 if (cl == q->active[prio]) {
619 q->active[prio] = cl_prev;
620 if (cl == q->active[prio]) {
621 q->active[prio] = NULL;
622 return 0;
623 }
624 }
625
626 cl = cl_prev->next_alive;
1a13cb63 627 } else if (sched - cl->penalized > 0)
1da177e4
LT
628 sched = cl->penalized;
629 } while ((cl_prev = cl) != q->active[prio]);
630
1a13cb63 631 return sched - now;
1da177e4
LT
632}
633
1a13cb63 634static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
1da177e4 635{
1a13cb63
PM
636 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
637 delay_timer);
638 struct Qdisc *sch = q->watchdog.qdisc;
639 psched_time_t now;
640 psched_tdiff_t delay = 0;
1da177e4
LT
641 unsigned pmask;
642
3bebcda2 643 now = psched_get_time();
1a13cb63 644
1da177e4
LT
645 pmask = q->pmask;
646 q->pmask = 0;
647
648 while (pmask) {
649 int prio = ffz(~pmask);
1a13cb63 650 psched_tdiff_t tmp;
1da177e4
LT
651
652 pmask &= ~(1<<prio);
653
1a13cb63 654 tmp = cbq_undelay_prio(q, prio, now);
1da177e4
LT
655 if (tmp > 0) {
656 q->pmask |= 1<<prio;
657 if (tmp < delay || delay == 0)
658 delay = tmp;
659 }
660 }
661
662 if (delay) {
1a13cb63
PM
663 ktime_t time;
664
665 time = ktime_set(0, 0);
666 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
667 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
1da177e4
LT
668 }
669
670 sch->flags &= ~TCQ_F_THROTTLED;
671 netif_schedule(sch->dev);
1a13cb63 672 return HRTIMER_NORESTART;
1da177e4
LT
673}
674
675
73ca4918 676#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
677
678static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
679{
680 int len = skb->len;
681 struct Qdisc *sch = child->__parent;
682 struct cbq_sched_data *q = qdisc_priv(sch);
683 struct cbq_class *cl = q->rx_class;
684
685 q->rx_class = NULL;
686
687 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
688
689 cbq_mark_toplevel(q, cl);
690
691 q->rx_class = cl;
692 cl->q->__parent = sch;
693
694 if (cl->q->enqueue(skb, cl->q) == 0) {
695 sch->q.qlen++;
696 sch->bstats.packets++;
697 sch->bstats.bytes+=len;
698 if (!cl->next_alive)
699 cbq_activate_class(cl);
700 return 0;
701 }
702 sch->qstats.drops++;
703 return 0;
704 }
705
706 sch->qstats.drops++;
707 return -1;
708}
709#endif
710
10297b99 711/*
1da177e4
LT
712 It is mission critical procedure.
713
714 We "regenerate" toplevel cutoff, if transmitting class
715 has backlog and it is not regulated. It is not part of
716 original CBQ description, but looks more reasonable.
717 Probably, it is wrong. This question needs further investigation.
718*/
719
720static __inline__ void
721cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
722 struct cbq_class *borrowed)
723{
724 if (cl && q->toplevel >= borrowed->level) {
725 if (cl->q->q.qlen > 1) {
726 do {
a084980d 727 if (borrowed->undertime == PSCHED_PASTPERFECT) {
1da177e4
LT
728 q->toplevel = borrowed->level;
729 return;
730 }
731 } while ((borrowed=borrowed->borrow) != NULL);
732 }
10297b99 733#if 0
1da177e4
LT
734 /* It is not necessary now. Uncommenting it
735 will save CPU cycles, but decrease fairness.
736 */
737 q->toplevel = TC_CBQ_MAXLEVEL;
738#endif
739 }
740}
741
742static void
743cbq_update(struct cbq_sched_data *q)
744{
745 struct cbq_class *this = q->tx_class;
746 struct cbq_class *cl = this;
747 int len = q->tx_len;
748
749 q->tx_class = NULL;
750
751 for ( ; cl; cl = cl->share) {
752 long avgidle = cl->avgidle;
753 long idle;
754
755 cl->bstats.packets++;
756 cl->bstats.bytes += len;
757
758 /*
759 (now - last) is total time between packet right edges.
760 (last_pktlen/rate) is "virtual" busy time, so that
761
10297b99 762 idle = (now - last) - last_pktlen/rate
1da177e4
LT
763 */
764
8edc0c31 765 idle = q->now - cl->last;
1da177e4
LT
766 if ((unsigned long)idle > 128*1024*1024) {
767 avgidle = cl->maxidle;
768 } else {
769 idle -= L2T(cl, len);
770
771 /* true_avgidle := (1-W)*true_avgidle + W*idle,
772 where W=2^{-ewma_log}. But cl->avgidle is scaled:
773 cl->avgidle == true_avgidle/W,
774 hence:
775 */
776 avgidle += idle - (avgidle>>cl->ewma_log);
777 }
778
779 if (avgidle <= 0) {
780 /* Overlimit or at-limit */
781
782 if (avgidle < cl->minidle)
783 avgidle = cl->minidle;
784
785 cl->avgidle = avgidle;
786
787 /* Calculate expected time, when this class
788 will be allowed to send.
789 It will occur, when:
790 (1-W)*true_avgidle + W*delay = 0, i.e.
791 idle = (1/W - 1)*(-true_avgidle)
792 or
793 idle = (1 - W)*(-cl->avgidle);
794 */
795 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
796
797 /*
798 That is not all.
799 To maintain the rate allocated to the class,
800 we add to undertime virtual clock,
801 necessary to complete transmitted packet.
802 (len/phys_bandwidth has been already passed
803 to the moment of cbq_update)
804 */
805
806 idle -= L2T(&q->link, len);
807 idle += L2T(cl, len);
808
7c59e25f 809 cl->undertime = q->now + idle;
1da177e4
LT
810 } else {
811 /* Underlimit */
812
a084980d 813 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
814 if (avgidle > cl->maxidle)
815 cl->avgidle = cl->maxidle;
816 else
817 cl->avgidle = avgidle;
818 }
819 cl->last = q->now;
820 }
821
822 cbq_update_toplevel(q, this, q->tx_borrowed);
823}
824
825static __inline__ struct cbq_class *
826cbq_under_limit(struct cbq_class *cl)
827{
828 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
829 struct cbq_class *this_cl = cl;
830
831 if (cl->tparent == NULL)
832 return cl;
833
a084980d 834 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
1da177e4
LT
835 cl->delayed = 0;
836 return cl;
837 }
838
839 do {
840 /* It is very suspicious place. Now overlimit
841 action is generated for not bounded classes
842 only if link is completely congested.
843 Though it is in agree with ancestor-only paradigm,
844 it looks very stupid. Particularly,
845 it means that this chunk of code will either
846 never be called or result in strong amplification
847 of burstiness. Dangerous, silly, and, however,
848 no another solution exists.
849 */
850 if ((cl = cl->borrow) == NULL) {
851 this_cl->qstats.overlimits++;
852 this_cl->overlimit(this_cl);
853 return NULL;
854 }
855 if (cl->level > q->toplevel)
856 return NULL;
a084980d 857 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
1da177e4
LT
858
859 cl->delayed = 0;
860 return cl;
861}
862
863static __inline__ struct sk_buff *
864cbq_dequeue_prio(struct Qdisc *sch, int prio)
865{
866 struct cbq_sched_data *q = qdisc_priv(sch);
867 struct cbq_class *cl_tail, *cl_prev, *cl;
868 struct sk_buff *skb;
869 int deficit;
870
871 cl_tail = cl_prev = q->active[prio];
872 cl = cl_prev->next_alive;
873
874 do {
875 deficit = 0;
876
877 /* Start round */
878 do {
879 struct cbq_class *borrow = cl;
880
881 if (cl->q->q.qlen &&
882 (borrow = cbq_under_limit(cl)) == NULL)
883 goto skip_class;
884
885 if (cl->deficit <= 0) {
886 /* Class exhausted its allotment per
887 this round. Switch to the next one.
888 */
889 deficit = 1;
890 cl->deficit += cl->quantum;
891 goto next_class;
892 }
893
894 skb = cl->q->dequeue(cl->q);
895
896 /* Class did not give us any skb :-(
10297b99 897 It could occur even if cl->q->q.qlen != 0
1da177e4
LT
898 f.e. if cl->q == "tbf"
899 */
900 if (skb == NULL)
901 goto skip_class;
902
903 cl->deficit -= skb->len;
904 q->tx_class = cl;
905 q->tx_borrowed = borrow;
906 if (borrow != cl) {
907#ifndef CBQ_XSTATS_BORROWS_BYTES
908 borrow->xstats.borrows++;
909 cl->xstats.borrows++;
910#else
911 borrow->xstats.borrows += skb->len;
912 cl->xstats.borrows += skb->len;
913#endif
914 }
915 q->tx_len = skb->len;
916
917 if (cl->deficit <= 0) {
918 q->active[prio] = cl;
919 cl = cl->next_alive;
920 cl->deficit += cl->quantum;
921 }
922 return skb;
923
924skip_class:
925 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
926 /* Class is empty or penalized.
927 Unlink it from active chain.
928 */
929 cl_prev->next_alive = cl->next_alive;
930 cl->next_alive = NULL;
931
932 /* Did cl_tail point to it? */
933 if (cl == cl_tail) {
934 /* Repair it! */
935 cl_tail = cl_prev;
936
937 /* Was it the last class in this band? */
938 if (cl == cl_tail) {
939 /* Kill the band! */
940 q->active[prio] = NULL;
941 q->activemask &= ~(1<<prio);
942 if (cl->q->q.qlen)
943 cbq_activate_class(cl);
944 return NULL;
945 }
946
947 q->active[prio] = cl_tail;
948 }
949 if (cl->q->q.qlen)
950 cbq_activate_class(cl);
951
952 cl = cl_prev;
953 }
954
955next_class:
956 cl_prev = cl;
957 cl = cl->next_alive;
958 } while (cl_prev != cl_tail);
959 } while (deficit);
960
961 q->active[prio] = cl_prev;
962
963 return NULL;
964}
965
966static __inline__ struct sk_buff *
967cbq_dequeue_1(struct Qdisc *sch)
968{
969 struct cbq_sched_data *q = qdisc_priv(sch);
970 struct sk_buff *skb;
971 unsigned activemask;
972
973 activemask = q->activemask&0xFF;
974 while (activemask) {
975 int prio = ffz(~activemask);
976 activemask &= ~(1<<prio);
977 skb = cbq_dequeue_prio(sch, prio);
978 if (skb)
979 return skb;
980 }
981 return NULL;
982}
983
984static struct sk_buff *
985cbq_dequeue(struct Qdisc *sch)
986{
987 struct sk_buff *skb;
988 struct cbq_sched_data *q = qdisc_priv(sch);
989 psched_time_t now;
990 psched_tdiff_t incr;
991
3bebcda2 992 now = psched_get_time();
8edc0c31 993 incr = now - q->now_rt;
1da177e4
LT
994
995 if (q->tx_class) {
996 psched_tdiff_t incr2;
997 /* Time integrator. We calculate EOS time
998 by adding expected packet transmission time.
999 If real time is greater, we warp artificial clock,
1000 so that:
1001
1002 cbq_time = max(real_time, work);
1003 */
1004 incr2 = L2T(&q->link, q->tx_len);
7c59e25f 1005 q->now += incr2;
1da177e4
LT
1006 cbq_update(q);
1007 if ((incr -= incr2) < 0)
1008 incr = 0;
1009 }
7c59e25f 1010 q->now += incr;
1da177e4
LT
1011 q->now_rt = now;
1012
1013 for (;;) {
1014 q->wd_expires = 0;
1015
1016 skb = cbq_dequeue_1(sch);
1017 if (skb) {
1018 sch->q.qlen--;
1019 sch->flags &= ~TCQ_F_THROTTLED;
1020 return skb;
1021 }
1022
1023 /* All the classes are overlimit.
1024
1025 It is possible, if:
1026
1027 1. Scheduler is empty.
1028 2. Toplevel cutoff inhibited borrowing.
1029 3. Root class is overlimit.
1030
1031 Reset 2d and 3d conditions and retry.
1032
1033 Note, that NS and cbq-2.0 are buggy, peeking
1034 an arbitrary class is appropriate for ancestor-only
1035 sharing, but not for toplevel algorithm.
1036
1037 Our version is better, but slower, because it requires
1038 two passes, but it is unavoidable with top-level sharing.
1039 */
1040
1041 if (q->toplevel == TC_CBQ_MAXLEVEL &&
a084980d 1042 q->link.undertime == PSCHED_PASTPERFECT)
1da177e4
LT
1043 break;
1044
1045 q->toplevel = TC_CBQ_MAXLEVEL;
a084980d 1046 q->link.undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1047 }
1048
1049 /* No packets in scheduler or nobody wants to give them to us :-(
1050 Sigh... start watchdog timer in the last case. */
1051
1052 if (sch->q.qlen) {
1053 sch->qstats.overlimits++;
88a99354
PM
1054 if (q->wd_expires)
1055 qdisc_watchdog_schedule(&q->watchdog,
bb239acf 1056 now + q->wd_expires);
1da177e4
LT
1057 }
1058 return NULL;
1059}
1060
1061/* CBQ class maintanance routines */
1062
1063static void cbq_adjust_levels(struct cbq_class *this)
1064{
1065 if (this == NULL)
1066 return;
1067
1068 do {
1069 int level = 0;
1070 struct cbq_class *cl;
1071
1072 if ((cl = this->children) != NULL) {
1073 do {
1074 if (cl->level > level)
1075 level = cl->level;
1076 } while ((cl = cl->sibling) != this->children);
1077 }
1078 this->level = level+1;
1079 } while ((this = this->tparent) != NULL);
1080}
1081
1082static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1083{
1084 struct cbq_class *cl;
1085 unsigned h;
1086
1087 if (q->quanta[prio] == 0)
1088 return;
1089
1090 for (h=0; h<16; h++) {
1091 for (cl = q->classes[h]; cl; cl = cl->next) {
1092 /* BUGGGG... Beware! This expression suffer of
1093 arithmetic overflows!
1094 */
1095 if (cl->priority == prio) {
1096 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1097 q->quanta[prio];
1098 }
1099 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1100 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1101 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1102 }
1103 }
1104 }
1105}
1106
1107static void cbq_sync_defmap(struct cbq_class *cl)
1108{
1109 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1110 struct cbq_class *split = cl->split;
1111 unsigned h;
1112 int i;
1113
1114 if (split == NULL)
1115 return;
1116
1117 for (i=0; i<=TC_PRIO_MAX; i++) {
1118 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1119 split->defaults[i] = NULL;
1120 }
1121
1122 for (i=0; i<=TC_PRIO_MAX; i++) {
1123 int level = split->level;
1124
1125 if (split->defaults[i])
1126 continue;
1127
1128 for (h=0; h<16; h++) {
1129 struct cbq_class *c;
1130
1131 for (c = q->classes[h]; c; c = c->next) {
1132 if (c->split == split && c->level < level &&
1133 c->defmap&(1<<i)) {
1134 split->defaults[i] = c;
1135 level = c->level;
1136 }
1137 }
1138 }
1139 }
1140}
1141
1142static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1143{
1144 struct cbq_class *split = NULL;
1145
1146 if (splitid == 0) {
1147 if ((split = cl->split) == NULL)
1148 return;
1149 splitid = split->classid;
1150 }
1151
1152 if (split == NULL || split->classid != splitid) {
1153 for (split = cl->tparent; split; split = split->tparent)
1154 if (split->classid == splitid)
1155 break;
1156 }
1157
1158 if (split == NULL)
1159 return;
1160
1161 if (cl->split != split) {
1162 cl->defmap = 0;
1163 cbq_sync_defmap(cl);
1164 cl->split = split;
1165 cl->defmap = def&mask;
1166 } else
1167 cl->defmap = (cl->defmap&~mask)|(def&mask);
1168
1169 cbq_sync_defmap(cl);
1170}
1171
1172static void cbq_unlink_class(struct cbq_class *this)
1173{
1174 struct cbq_class *cl, **clp;
1175 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1176
1177 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1178 if (cl == this) {
1179 *clp = cl->next;
1180 cl->next = NULL;
1181 break;
1182 }
1183 }
1184
1185 if (this->tparent) {
1186 clp=&this->sibling;
1187 cl = *clp;
1188 do {
1189 if (cl == this) {
1190 *clp = cl->sibling;
1191 break;
1192 }
1193 clp = &cl->sibling;
1194 } while ((cl = *clp) != this->sibling);
1195
1196 if (this->tparent->children == this) {
1197 this->tparent->children = this->sibling;
1198 if (this->sibling == this)
1199 this->tparent->children = NULL;
1200 }
1201 } else {
1202 BUG_TRAP(this->sibling == this);
1203 }
1204}
1205
1206static void cbq_link_class(struct cbq_class *this)
1207{
1208 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1209 unsigned h = cbq_hash(this->classid);
1210 struct cbq_class *parent = this->tparent;
1211
1212 this->sibling = this;
1213 this->next = q->classes[h];
1214 q->classes[h] = this;
1215
1216 if (parent == NULL)
1217 return;
1218
1219 if (parent->children == NULL) {
1220 parent->children = this;
1221 } else {
1222 this->sibling = parent->children->sibling;
1223 parent->children->sibling = this;
1224 }
1225}
1226
1227static unsigned int cbq_drop(struct Qdisc* sch)
1228{
1229 struct cbq_sched_data *q = qdisc_priv(sch);
1230 struct cbq_class *cl, *cl_head;
1231 int prio;
1232 unsigned int len;
1233
1234 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1235 if ((cl_head = q->active[prio]) == NULL)
1236 continue;
1237
1238 cl = cl_head;
1239 do {
1240 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1241 sch->q.qlen--;
a37ef2e3
JP
1242 if (!cl->q->q.qlen)
1243 cbq_deactivate_class(cl);
1da177e4
LT
1244 return len;
1245 }
1246 } while ((cl = cl->next_alive) != cl_head);
1247 }
1248 return 0;
1249}
1250
1251static void
1252cbq_reset(struct Qdisc* sch)
1253{
1254 struct cbq_sched_data *q = qdisc_priv(sch);
1255 struct cbq_class *cl;
1256 int prio;
1257 unsigned h;
1258
1259 q->activemask = 0;
1260 q->pmask = 0;
1261 q->tx_class = NULL;
1262 q->tx_borrowed = NULL;
88a99354 1263 qdisc_watchdog_cancel(&q->watchdog);
1a13cb63 1264 hrtimer_cancel(&q->delay_timer);
1da177e4 1265 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1266 q->now = psched_get_time();
1da177e4
LT
1267 q->now_rt = q->now;
1268
1269 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1270 q->active[prio] = NULL;
1271
1272 for (h = 0; h < 16; h++) {
1273 for (cl = q->classes[h]; cl; cl = cl->next) {
1274 qdisc_reset(cl->q);
1275
1276 cl->next_alive = NULL;
a084980d 1277 cl->undertime = PSCHED_PASTPERFECT;
1da177e4
LT
1278 cl->avgidle = cl->maxidle;
1279 cl->deficit = cl->quantum;
1280 cl->cpriority = cl->priority;
1281 }
1282 }
1283 sch->q.qlen = 0;
1284}
1285
1286
1287static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1288{
1289 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1290 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1291 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1292 }
1293 if (lss->change&TCF_CBQ_LSS_EWMA)
1294 cl->ewma_log = lss->ewma_log;
1295 if (lss->change&TCF_CBQ_LSS_AVPKT)
1296 cl->avpkt = lss->avpkt;
1297 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1298 cl->minidle = -(long)lss->minidle;
1299 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1300 cl->maxidle = lss->maxidle;
1301 cl->avgidle = lss->maxidle;
1302 }
1303 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1304 cl->offtime = lss->offtime;
1305 return 0;
1306}
1307
1308static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1309{
1310 q->nclasses[cl->priority]--;
1311 q->quanta[cl->priority] -= cl->weight;
1312 cbq_normalize_quanta(q, cl->priority);
1313}
1314
1315static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1316{
1317 q->nclasses[cl->priority]++;
1318 q->quanta[cl->priority] += cl->weight;
1319 cbq_normalize_quanta(q, cl->priority);
1320}
1321
1322static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1323{
1324 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1325
1326 if (wrr->allot)
1327 cl->allot = wrr->allot;
1328 if (wrr->weight)
1329 cl->weight = wrr->weight;
1330 if (wrr->priority) {
1331 cl->priority = wrr->priority-1;
1332 cl->cpriority = cl->priority;
1333 if (cl->priority >= cl->priority2)
1334 cl->priority2 = TC_CBQ_MAXPRIO-1;
1335 }
1336
1337 cbq_addprio(q, cl);
1338 return 0;
1339}
1340
1341static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1342{
1343 switch (ovl->strategy) {
1344 case TC_CBQ_OVL_CLASSIC:
1345 cl->overlimit = cbq_ovl_classic;
1346 break;
1347 case TC_CBQ_OVL_DELAY:
1348 cl->overlimit = cbq_ovl_delay;
1349 break;
1350 case TC_CBQ_OVL_LOWPRIO:
1351 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1352 ovl->priority2-1 <= cl->priority)
1353 return -EINVAL;
1354 cl->priority2 = ovl->priority2-1;
1355 cl->overlimit = cbq_ovl_lowprio;
1356 break;
1357 case TC_CBQ_OVL_DROP:
1358 cl->overlimit = cbq_ovl_drop;
1359 break;
1360 case TC_CBQ_OVL_RCLASSIC:
1361 cl->overlimit = cbq_ovl_rclassic;
1362 break;
1363 default:
1364 return -EINVAL;
1365 }
1a13cb63 1366 cl->penalty = ovl->penalty;
1da177e4
LT
1367 return 0;
1368}
1369
73ca4918 1370#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1371static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1372{
1373 cl->police = p->police;
1374
1375 if (cl->q->handle) {
1376 if (p->police == TC_POLICE_RECLASSIFY)
1377 cl->q->reshape_fail = cbq_reshape_fail;
1378 else
1379 cl->q->reshape_fail = NULL;
1380 }
1381 return 0;
1382}
1383#endif
1384
1385static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1386{
1387 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1388 return 0;
1389}
1390
1391static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1392{
1393 struct cbq_sched_data *q = qdisc_priv(sch);
1394 struct rtattr *tb[TCA_CBQ_MAX];
1395 struct tc_ratespec *r;
1396
1397 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1398 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1399 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1400 return -EINVAL;
1401
1402 if (tb[TCA_CBQ_LSSOPT-1] &&
1403 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1404 return -EINVAL;
1405
1406 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1407
1408 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1409 return -EINVAL;
1410
1411 q->link.refcnt = 1;
1412 q->link.sibling = &q->link;
1413 q->link.classid = sch->handle;
1414 q->link.qdisc = sch;
9f9afec4
PM
1415 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1416 sch->handle)))
1da177e4
LT
1417 q->link.q = &noop_qdisc;
1418
1419 q->link.priority = TC_CBQ_MAXPRIO-1;
1420 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1421 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1422 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1423 q->link.overlimit = cbq_ovl_classic;
1424 q->link.allot = psched_mtu(sch->dev);
1425 q->link.quantum = q->link.allot;
1426 q->link.weight = q->link.R_tab->rate.rate;
1427
1428 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1429 q->link.avpkt = q->link.allot/2;
1430 q->link.minidle = -0x7FFFFFFF;
1da177e4 1431
88a99354 1432 qdisc_watchdog_init(&q->watchdog, sch);
1a13cb63 1433 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1da177e4
LT
1434 q->delay_timer.function = cbq_undelay;
1435 q->toplevel = TC_CBQ_MAXLEVEL;
3bebcda2 1436 q->now = psched_get_time();
1da177e4
LT
1437 q->now_rt = q->now;
1438
1439 cbq_link_class(&q->link);
1440
1441 if (tb[TCA_CBQ_LSSOPT-1])
1442 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1443
1444 cbq_addprio(q, &q->link);
1445 return 0;
1446}
1447
1448static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1449{
27a884dc 1450 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1451
1452 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1453 return skb->len;
1454
1455rtattr_failure:
dc5fc579 1456 nlmsg_trim(skb, b);
1da177e4
LT
1457 return -1;
1458}
1459
1460static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1461{
27a884dc 1462 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1463 struct tc_cbq_lssopt opt;
1464
1465 opt.flags = 0;
1466 if (cl->borrow == NULL)
1467 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1468 if (cl->share == NULL)
1469 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1470 opt.ewma_log = cl->ewma_log;
1471 opt.level = cl->level;
1472 opt.avpkt = cl->avpkt;
1473 opt.maxidle = cl->maxidle;
1474 opt.minidle = (u32)(-cl->minidle);
1475 opt.offtime = cl->offtime;
1476 opt.change = ~0;
1477 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1478 return skb->len;
1479
1480rtattr_failure:
dc5fc579 1481 nlmsg_trim(skb, b);
1da177e4
LT
1482 return -1;
1483}
1484
1485static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1486{
27a884dc 1487 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1488 struct tc_cbq_wrropt opt;
1489
1490 opt.flags = 0;
1491 opt.allot = cl->allot;
1492 opt.priority = cl->priority+1;
1493 opt.cpriority = cl->cpriority+1;
1494 opt.weight = cl->weight;
1495 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1496 return skb->len;
1497
1498rtattr_failure:
dc5fc579 1499 nlmsg_trim(skb, b);
1da177e4
LT
1500 return -1;
1501}
1502
1503static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1504{
27a884dc 1505 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1506 struct tc_cbq_ovl opt;
1507
1508 opt.strategy = cl->ovl_strategy;
1509 opt.priority2 = cl->priority2+1;
8a47077a 1510 opt.pad = 0;
1a13cb63 1511 opt.penalty = cl->penalty;
1da177e4
LT
1512 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1513 return skb->len;
1514
1515rtattr_failure:
dc5fc579 1516 nlmsg_trim(skb, b);
1da177e4
LT
1517 return -1;
1518}
1519
1520static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1521{
27a884dc 1522 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1523 struct tc_cbq_fopt opt;
1524
1525 if (cl->split || cl->defmap) {
1526 opt.split = cl->split ? cl->split->classid : 0;
1527 opt.defmap = cl->defmap;
1528 opt.defchange = ~0;
1529 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1530 }
1531 return skb->len;
1532
1533rtattr_failure:
dc5fc579 1534 nlmsg_trim(skb, b);
1da177e4
LT
1535 return -1;
1536}
1537
73ca4918 1538#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1539static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1540{
27a884dc 1541 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1542 struct tc_cbq_police opt;
1543
1544 if (cl->police) {
1545 opt.police = cl->police;
9ef1d4c7
PM
1546 opt.__res1 = 0;
1547 opt.__res2 = 0;
1da177e4
LT
1548 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1549 }
1550 return skb->len;
1551
1552rtattr_failure:
dc5fc579 1553 nlmsg_trim(skb, b);
1da177e4
LT
1554 return -1;
1555}
1556#endif
1557
1558static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1559{
1560 if (cbq_dump_lss(skb, cl) < 0 ||
1561 cbq_dump_rate(skb, cl) < 0 ||
1562 cbq_dump_wrr(skb, cl) < 0 ||
1563 cbq_dump_ovl(skb, cl) < 0 ||
73ca4918 1564#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1565 cbq_dump_police(skb, cl) < 0 ||
1566#endif
1567 cbq_dump_fopt(skb, cl) < 0)
1568 return -1;
1569 return 0;
1570}
1571
1572static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1573{
1574 struct cbq_sched_data *q = qdisc_priv(sch);
27a884dc 1575 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1576 struct rtattr *rta;
1577
1578 rta = (struct rtattr*)b;
1579 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1580 if (cbq_dump_attr(skb, &q->link) < 0)
1581 goto rtattr_failure;
27a884dc 1582 rta->rta_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1583 return skb->len;
1584
1585rtattr_failure:
dc5fc579 1586 nlmsg_trim(skb, b);
1da177e4
LT
1587 return -1;
1588}
1589
1590static int
1591cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1592{
1593 struct cbq_sched_data *q = qdisc_priv(sch);
1594
1595 q->link.xstats.avgidle = q->link.avgidle;
1596 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1597}
1598
1599static int
1600cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1601 struct sk_buff *skb, struct tcmsg *tcm)
1602{
1603 struct cbq_class *cl = (struct cbq_class*)arg;
27a884dc 1604 unsigned char *b = skb_tail_pointer(skb);
1da177e4
LT
1605 struct rtattr *rta;
1606
1607 if (cl->tparent)
1608 tcm->tcm_parent = cl->tparent->classid;
1609 else
1610 tcm->tcm_parent = TC_H_ROOT;
1611 tcm->tcm_handle = cl->classid;
1612 tcm->tcm_info = cl->q->handle;
1613
1614 rta = (struct rtattr*)b;
1615 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1616 if (cbq_dump_attr(skb, cl) < 0)
1617 goto rtattr_failure;
27a884dc 1618 rta->rta_len = skb_tail_pointer(skb) - b;
1da177e4
LT
1619 return skb->len;
1620
1621rtattr_failure:
dc5fc579 1622 nlmsg_trim(skb, b);
1da177e4
LT
1623 return -1;
1624}
1625
1626static int
1627cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1628 struct gnet_dump *d)
1629{
1630 struct cbq_sched_data *q = qdisc_priv(sch);
1631 struct cbq_class *cl = (struct cbq_class*)arg;
1632
1633 cl->qstats.qlen = cl->q->q.qlen;
1634 cl->xstats.avgidle = cl->avgidle;
1635 cl->xstats.undertime = 0;
1636
a084980d 1637 if (cl->undertime != PSCHED_PASTPERFECT)
8edc0c31 1638 cl->xstats.undertime = cl->undertime - q->now;
1da177e4
LT
1639
1640 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1da177e4 1641 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1da177e4
LT
1642 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1643 return -1;
1644
1645 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1646}
1647
1648static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1649 struct Qdisc **old)
1650{
1651 struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653 if (cl) {
1654 if (new == NULL) {
9f9afec4
PM
1655 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1656 cl->classid)) == NULL)
1da177e4
LT
1657 return -ENOBUFS;
1658 } else {
73ca4918 1659#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1660 if (cl->police == TC_POLICE_RECLASSIFY)
1661 new->reshape_fail = cbq_reshape_fail;
1662#endif
1663 }
1664 sch_tree_lock(sch);
a37ef2e3 1665 *old = xchg(&cl->q, new);
5e50da01 1666 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1da177e4
LT
1667 qdisc_reset(*old);
1668 sch_tree_unlock(sch);
1669
1670 return 0;
1671 }
1672 return -ENOENT;
1673}
1674
1675static struct Qdisc *
1676cbq_leaf(struct Qdisc *sch, unsigned long arg)
1677{
1678 struct cbq_class *cl = (struct cbq_class*)arg;
1679
1680 return cl ? cl->q : NULL;
1681}
1682
a37ef2e3
JP
1683static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1684{
1685 struct cbq_class *cl = (struct cbq_class *)arg;
1686
1687 if (cl->q->q.qlen == 0)
1688 cbq_deactivate_class(cl);
1689}
1690
1da177e4
LT
1691static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1692{
1693 struct cbq_sched_data *q = qdisc_priv(sch);
1694 struct cbq_class *cl = cbq_class_lookup(q, classid);
1695
1696 if (cl) {
1697 cl->refcnt++;
1698 return (unsigned long)cl;
1699 }
1700 return 0;
1701}
1702
1da177e4
LT
1703static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1704{
1705 struct cbq_sched_data *q = qdisc_priv(sch);
1706
1707 BUG_TRAP(!cl->filters);
1708
a48b5a61 1709 tcf_destroy_chain(cl->filter_list);
1da177e4
LT
1710 qdisc_destroy(cl->q);
1711 qdisc_put_rtab(cl->R_tab);
1da177e4 1712 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1da177e4
LT
1713 if (cl != &q->link)
1714 kfree(cl);
1715}
1716
1717static void
1718cbq_destroy(struct Qdisc* sch)
1719{
1720 struct cbq_sched_data *q = qdisc_priv(sch);
1721 struct cbq_class *cl;
1722 unsigned h;
1723
73ca4918 1724#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1725 q->rx_class = NULL;
1726#endif
1727 /*
1728 * Filters must be destroyed first because we don't destroy the
1729 * classes from root to leafs which means that filters can still
1730 * be bound to classes which have been destroyed already. --TGR '04
1731 */
b00b4bf9
PM
1732 for (h = 0; h < 16; h++) {
1733 for (cl = q->classes[h]; cl; cl = cl->next) {
a48b5a61 1734 tcf_destroy_chain(cl->filter_list);
b00b4bf9
PM
1735 cl->filter_list = NULL;
1736 }
1737 }
1da177e4
LT
1738 for (h = 0; h < 16; h++) {
1739 struct cbq_class *next;
1740
1741 for (cl = q->classes[h]; cl; cl = next) {
1742 next = cl->next;
1743 cbq_destroy_class(sch, cl);
1744 }
1745 }
1746}
1747
1748static void cbq_put(struct Qdisc *sch, unsigned long arg)
1749{
1750 struct cbq_class *cl = (struct cbq_class*)arg;
1751
1752 if (--cl->refcnt == 0) {
73ca4918 1753#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1754 struct cbq_sched_data *q = qdisc_priv(sch);
1755
1756 spin_lock_bh(&sch->dev->queue_lock);
1757 if (q->rx_class == cl)
1758 q->rx_class = NULL;
1759 spin_unlock_bh(&sch->dev->queue_lock);
1760#endif
1761
1762 cbq_destroy_class(sch, cl);
1763 }
1764}
1765
1766static int
1767cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1768 unsigned long *arg)
1769{
1770 int err;
1771 struct cbq_sched_data *q = qdisc_priv(sch);
1772 struct cbq_class *cl = (struct cbq_class*)*arg;
1773 struct rtattr *opt = tca[TCA_OPTIONS-1];
1774 struct rtattr *tb[TCA_CBQ_MAX];
1775 struct cbq_class *parent;
1776 struct qdisc_rate_table *rtab = NULL;
1777
1778 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1779 return -EINVAL;
1780
1781 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1782 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1783 return -EINVAL;
1784
1785 if (tb[TCA_CBQ_FOPT-1] &&
1786 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1787 return -EINVAL;
1788
1789 if (tb[TCA_CBQ_RATE-1] &&
1790 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1791 return -EINVAL;
1792
1793 if (tb[TCA_CBQ_LSSOPT-1] &&
1794 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1795 return -EINVAL;
1796
1797 if (tb[TCA_CBQ_WRROPT-1] &&
1798 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1799 return -EINVAL;
1800
73ca4918 1801#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1802 if (tb[TCA_CBQ_POLICE-1] &&
1803 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1804 return -EINVAL;
1805#endif
1806
1807 if (cl) {
1808 /* Check parent */
1809 if (parentid) {
1810 if (cl->tparent && cl->tparent->classid != parentid)
1811 return -EINVAL;
1812 if (!cl->tparent && parentid != TC_H_ROOT)
1813 return -EINVAL;
1814 }
1815
1816 if (tb[TCA_CBQ_RATE-1]) {
1817 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1818 if (rtab == NULL)
1819 return -EINVAL;
1820 }
1821
1822 /* Change class parameters */
1823 sch_tree_lock(sch);
1824
1825 if (cl->next_alive != NULL)
1826 cbq_deactivate_class(cl);
1827
1828 if (rtab) {
1829 rtab = xchg(&cl->R_tab, rtab);
1830 qdisc_put_rtab(rtab);
1831 }
1832
1833 if (tb[TCA_CBQ_LSSOPT-1])
1834 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1835
1836 if (tb[TCA_CBQ_WRROPT-1]) {
1837 cbq_rmprio(q, cl);
1838 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1839 }
1840
1841 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1842 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1843
73ca4918 1844#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1845 if (tb[TCA_CBQ_POLICE-1])
1846 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1847#endif
1848
1849 if (tb[TCA_CBQ_FOPT-1])
1850 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1851
1852 if (cl->q->q.qlen)
1853 cbq_activate_class(cl);
1854
1855 sch_tree_unlock(sch);
1856
1da177e4
LT
1857 if (tca[TCA_RATE-1])
1858 gen_replace_estimator(&cl->bstats, &cl->rate_est,
4bdf3991
PM
1859 &sch->dev->queue_lock,
1860 tca[TCA_RATE-1]);
1da177e4
LT
1861 return 0;
1862 }
1863
1864 if (parentid == TC_H_ROOT)
1865 return -EINVAL;
1866
1867 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1868 tb[TCA_CBQ_LSSOPT-1] == NULL)
1869 return -EINVAL;
1870
1871 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1872 if (rtab == NULL)
1873 return -EINVAL;
1874
1875 if (classid) {
1876 err = -EINVAL;
1877 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1878 goto failure;
1879 } else {
1880 int i;
1881 classid = TC_H_MAKE(sch->handle,0x8000);
1882
1883 for (i=0; i<0x8000; i++) {
1884 if (++q->hgenerator >= 0x8000)
1885 q->hgenerator = 1;
1886 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1887 break;
1888 }
1889 err = -ENOSR;
1890 if (i >= 0x8000)
1891 goto failure;
1892 classid = classid|q->hgenerator;
1893 }
1894
1895 parent = &q->link;
1896 if (parentid) {
1897 parent = cbq_class_lookup(q, parentid);
1898 err = -EINVAL;
1899 if (parent == NULL)
1900 goto failure;
1901 }
1902
1903 err = -ENOBUFS;
0da974f4 1904 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1da177e4
LT
1905 if (cl == NULL)
1906 goto failure;
1da177e4
LT
1907 cl->R_tab = rtab;
1908 rtab = NULL;
1909 cl->refcnt = 1;
9f9afec4 1910 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1da177e4
LT
1911 cl->q = &noop_qdisc;
1912 cl->classid = classid;
1913 cl->tparent = parent;
1914 cl->qdisc = sch;
1915 cl->allot = parent->allot;
1916 cl->quantum = cl->allot;
1917 cl->weight = cl->R_tab->rate.rate;
1da177e4
LT
1918
1919 sch_tree_lock(sch);
1920 cbq_link_class(cl);
1921 cl->borrow = cl->tparent;
1922 if (cl->tparent != &q->link)
1923 cl->share = cl->tparent;
1924 cbq_adjust_levels(parent);
1925 cl->minidle = -0x7FFFFFFF;
1926 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1927 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1928 if (cl->ewma_log==0)
1929 cl->ewma_log = q->link.ewma_log;
1930 if (cl->maxidle==0)
1931 cl->maxidle = q->link.maxidle;
1932 if (cl->avpkt==0)
1933 cl->avpkt = q->link.avpkt;
1934 cl->overlimit = cbq_ovl_classic;
1935 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1936 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
73ca4918 1937#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1938 if (tb[TCA_CBQ_POLICE-1])
1939 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1940#endif
1941 if (tb[TCA_CBQ_FOPT-1])
1942 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1943 sch_tree_unlock(sch);
1944
1da177e4
LT
1945 if (tca[TCA_RATE-1])
1946 gen_new_estimator(&cl->bstats, &cl->rate_est,
4bdf3991 1947 &sch->dev->queue_lock, tca[TCA_RATE-1]);
1da177e4
LT
1948
1949 *arg = (unsigned long)cl;
1950 return 0;
1951
1952failure:
1953 qdisc_put_rtab(rtab);
1954 return err;
1955}
1956
1957static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1958{
1959 struct cbq_sched_data *q = qdisc_priv(sch);
1960 struct cbq_class *cl = (struct cbq_class*)arg;
a37ef2e3 1961 unsigned int qlen;
1da177e4
LT
1962
1963 if (cl->filters || cl->children || cl == &q->link)
1964 return -EBUSY;
1965
1966 sch_tree_lock(sch);
1967
a37ef2e3
JP
1968 qlen = cl->q->q.qlen;
1969 qdisc_reset(cl->q);
1970 qdisc_tree_decrease_qlen(cl->q, qlen);
1971
1da177e4
LT
1972 if (cl->next_alive)
1973 cbq_deactivate_class(cl);
1974
1975 if (q->tx_borrowed == cl)
1976 q->tx_borrowed = q->tx_class;
1977 if (q->tx_class == cl) {
1978 q->tx_class = NULL;
1979 q->tx_borrowed = NULL;
1980 }
73ca4918 1981#if defined(CONFIG_NET_CLS_ACT) || defined(CONFIG_NET_CLS_POLICE)
1da177e4
LT
1982 if (q->rx_class == cl)
1983 q->rx_class = NULL;
1984#endif
1985
1986 cbq_unlink_class(cl);
1987 cbq_adjust_levels(cl->tparent);
1988 cl->defmap = 0;
1989 cbq_sync_defmap(cl);
1990
1991 cbq_rmprio(q, cl);
1992 sch_tree_unlock(sch);
1993
1994 if (--cl->refcnt == 0)
1995 cbq_destroy_class(sch, cl);
1996
1997 return 0;
1998}
1999
2000static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2001{
2002 struct cbq_sched_data *q = qdisc_priv(sch);
2003 struct cbq_class *cl = (struct cbq_class *)arg;
2004
2005 if (cl == NULL)
2006 cl = &q->link;
2007
2008 return &cl->filter_list;
2009}
2010
2011static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2012 u32 classid)
2013{
2014 struct cbq_sched_data *q = qdisc_priv(sch);
2015 struct cbq_class *p = (struct cbq_class*)parent;
2016 struct cbq_class *cl = cbq_class_lookup(q, classid);
2017
2018 if (cl) {
2019 if (p && p->level <= cl->level)
2020 return 0;
2021 cl->filters++;
2022 return (unsigned long)cl;
2023 }
2024 return 0;
2025}
2026
2027static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2028{
2029 struct cbq_class *cl = (struct cbq_class*)arg;
2030
2031 cl->filters--;
2032}
2033
2034static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2035{
2036 struct cbq_sched_data *q = qdisc_priv(sch);
2037 unsigned h;
2038
2039 if (arg->stop)
2040 return;
2041
2042 for (h = 0; h < 16; h++) {
2043 struct cbq_class *cl;
2044
2045 for (cl = q->classes[h]; cl; cl = cl->next) {
2046 if (arg->count < arg->skip) {
2047 arg->count++;
2048 continue;
2049 }
2050 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2051 arg->stop = 1;
2052 return;
2053 }
2054 arg->count++;
2055 }
2056 }
2057}
2058
2059static struct Qdisc_class_ops cbq_class_ops = {
2060 .graft = cbq_graft,
2061 .leaf = cbq_leaf,
a37ef2e3 2062 .qlen_notify = cbq_qlen_notify,
1da177e4
LT
2063 .get = cbq_get,
2064 .put = cbq_put,
2065 .change = cbq_change_class,
2066 .delete = cbq_delete,
2067 .walk = cbq_walk,
2068 .tcf_chain = cbq_find_tcf,
2069 .bind_tcf = cbq_bind_filter,
2070 .unbind_tcf = cbq_unbind_filter,
2071 .dump = cbq_dump_class,
2072 .dump_stats = cbq_dump_class_stats,
2073};
2074
2075static struct Qdisc_ops cbq_qdisc_ops = {
2076 .next = NULL,
2077 .cl_ops = &cbq_class_ops,
2078 .id = "cbq",
2079 .priv_size = sizeof(struct cbq_sched_data),
2080 .enqueue = cbq_enqueue,
2081 .dequeue = cbq_dequeue,
2082 .requeue = cbq_requeue,
2083 .drop = cbq_drop,
2084 .init = cbq_init,
2085 .reset = cbq_reset,
2086 .destroy = cbq_destroy,
2087 .change = NULL,
2088 .dump = cbq_dump,
2089 .dump_stats = cbq_dump_stats,
2090 .owner = THIS_MODULE,
2091};
2092
2093static int __init cbq_module_init(void)
2094{
2095 return register_qdisc(&cbq_qdisc_ops);
2096}
10297b99 2097static void __exit cbq_module_exit(void)
1da177e4
LT
2098{
2099 unregister_qdisc(&cbq_qdisc_ops);
2100}
2101module_init(cbq_module_init)
2102module_exit(cbq_module_exit)
2103MODULE_LICENSE("GPL");