Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input
[linux-2.6-block.git] / net / sched / sch_htb.c
CommitLineData
87990467 1/*
1da177e4
LT
2 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Martin Devera, <devik@cdi.cz>
10 *
11 * Credits (in time order) for older HTB versions:
12 * Stef Coene <stef.coene@docum.org>
13 * HTB support at LARTC mailing list
10297b99 14 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
15 * found missing INIT_QDISC(htb)
16 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
17 * helped a lot to locate nasty class stall bug
18 * Andi Kleen, Jamal Hadi, Bert Hubert
19 * code review and helpful comments on shaping
20 * Tomasz Wrona, <tw@eter.tym.pl>
21 * created test case so that I was able to fix nasty bug
22 * Wilfried Weissmann
23 * spotted bug in dequeue code and helped with fix
24 * Jiri Fojtasek
25 * fixed requeue routine
26 * and many others. thanks.
1da177e4 27 */
1da177e4 28#include <linux/module.h>
47083fc0 29#include <linux/moduleparam.h>
1da177e4
LT
30#include <linux/types.h>
31#include <linux/kernel.h>
1da177e4 32#include <linux/string.h>
1da177e4 33#include <linux/errno.h>
1da177e4
LT
34#include <linux/skbuff.h>
35#include <linux/list.h>
36#include <linux/compiler.h>
0ba48053 37#include <linux/rbtree.h>
1224736d 38#include <linux/workqueue.h>
5a0e3ad6 39#include <linux/slab.h>
dc5fc579 40#include <net/netlink.h>
292f1c7f 41#include <net/sch_generic.h>
1da177e4 42#include <net/pkt_sched.h>
1da177e4
LT
43
44/* HTB algorithm.
45 Author: devik@cdi.cz
46 ========================================================================
47 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 48 it allows to assign priority to each class in hierarchy.
1da177e4
LT
49 In fact it is another implementation of Floyd's formal sharing.
50
51 Levels:
10297b99 52 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
53 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
54 one less than their parent.
55*/
56
47083fc0 57static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 58#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
59
60#if HTB_VER >> 16 != TC_HTB_PROTOVER
61#error "Mismatched sch_htb.c and pkt_sch.h"
62#endif
63
47083fc0
JDB
64/* Module parameter and sysfs export */
65module_param (htb_hysteresis, int, 0640);
66MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
67
64153ce0
ED
68static int htb_rate_est = 0; /* htb classes have a default rate estimator */
69module_param(htb_rate_est, int, 0640);
70MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
71
1da177e4
LT
72/* used internaly to keep status of single class */
73enum htb_cmode {
87990467
SH
74 HTB_CANT_SEND, /* class can't send and can't borrow */
75 HTB_MAY_BORROW, /* class can't send but may borrow */
76 HTB_CAN_SEND /* class can send */
1da177e4
LT
77};
78
c9364636
ED
79struct htb_prio {
80 union {
81 struct rb_root row;
82 struct rb_root feed;
83 };
84 struct rb_node *ptr;
85 /* When class changes from state 1->2 and disconnects from
86 * parent's feed then we lost ptr value and start from the
87 * first child again. Here we store classid of the
88 * last valid ptr (used when ptr is NULL).
89 */
90 u32 last_ptr_id;
91};
92
ca4ec90b
ED
93/* interior & leaf nodes; props specific to leaves are marked L:
94 * To reduce false sharing, place mostly read fields at beginning,
95 * and mostly written ones at the end.
96 */
87990467 97struct htb_class {
f4c1f3e0 98 struct Qdisc_class_common common;
ca4ec90b
ED
99 struct psched_ratecfg rate;
100 struct psched_ratecfg ceil;
101 s64 buffer, cbuffer;/* token bucket depth/rate */
102 s64 mbuffer; /* max wait time */
cbd37556 103 u32 prio; /* these two are used only by leaves... */
ca4ec90b
ED
104 int quantum; /* but stored for parent-to-leaf return */
105
106 struct tcf_proto *filter_list; /* class attached filters */
107 int filter_cnt;
108 int refcnt; /* usage count of this class */
109
110 int level; /* our level (see above) */
111 unsigned int children;
112 struct htb_class *parent; /* parent class */
113
45203a3b 114 struct gnet_stats_rate_est64 rate_est;
1da177e4 115
ca4ec90b
ED
116 /*
117 * Written often fields
118 */
119 struct gnet_stats_basic_packed bstats;
120 struct gnet_stats_queue qstats;
121 struct tc_htb_xstats xstats; /* our special stats */
87990467 122
ca4ec90b
ED
123 /* token bucket parameters */
124 s64 tokens, ctokens;/* current number of tokens */
125 s64 t_c; /* checkpoint time */
c19f7a34 126
87990467
SH
127 union {
128 struct htb_class_leaf {
87990467 129 struct list_head drop_list;
c9364636
ED
130 int deficit[TC_HTB_MAXDEPTH];
131 struct Qdisc *q;
87990467
SH
132 } leaf;
133 struct htb_class_inner {
c9364636 134 struct htb_prio clprio[TC_HTB_NUMPRIO];
87990467
SH
135 } inner;
136 } un;
ca4ec90b 137 s64 pq_key;
87990467 138
ca4ec90b
ED
139 int prio_activity; /* for which prios are we active */
140 enum htb_cmode cmode; /* current mode of the class */
141 struct rb_node pq_node; /* node for event queue */
142 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
1da177e4
LT
143};
144
c9364636
ED
145struct htb_level {
146 struct rb_root wait_pq;
147 struct htb_prio hprio[TC_HTB_NUMPRIO];
148};
149
87990467 150struct htb_sched {
f4c1f3e0 151 struct Qdisc_class_hash clhash;
c9364636
ED
152 int defcls; /* class where unclassified flows go to */
153 int rate2quantum; /* quant = rate / rate2quantum */
1da177e4 154
c9364636
ED
155 /* filters for qdisc itself */
156 struct tcf_proto *filter_list;
1da177e4 157
c9364636
ED
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned; /* only one warning */
160 int direct_qlen;
161 struct work_struct work;
1da177e4 162
c9364636
ED
163 /* non shaped skbs; let them go directly thru */
164 struct sk_buff_head direct_queue;
165 long direct_pkts;
1da177e4 166
c9364636 167 struct qdisc_watchdog watchdog;
1da177e4 168
c9364636
ED
169 s64 now; /* cached dequeue time */
170 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
1da177e4 171
c9364636
ED
172 /* time of nearest event per level (row) */
173 s64 near_ev_cache[TC_HTB_MAXDEPTH];
87990467 174
c9364636 175 int row_mask[TC_HTB_MAXDEPTH];
e82181de 176
c9364636 177 struct htb_level hlevel[TC_HTB_MAXDEPTH];
1da177e4
LT
178};
179
1da177e4 180/* find class in global hash table using given handle */
87990467 181static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
182{
183 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 184 struct Qdisc_class_common *clc;
0cef296d 185
f4c1f3e0
PM
186 clc = qdisc_class_find(&q->clhash, handle);
187 if (clc == NULL)
1da177e4 188 return NULL;
f4c1f3e0 189 return container_of(clc, struct htb_class, common);
1da177e4
LT
190}
191
192/**
193 * htb_classify - classify a packet into class
194 *
195 * It returns NULL if the packet should be dropped or -1 if the packet
196 * should be passed directly thru. In all other cases leaf class is returned.
197 * We allow direct class selection by classid in priority. The we examine
198 * filters in qdisc and in inner nodes (if higher filter points to the inner
199 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 200 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 201 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
202 * then finish and return direct queue.
203 */
cc7ec456 204#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 205
87990467
SH
206static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
207 int *qerr)
1da177e4
LT
208{
209 struct htb_sched *q = qdisc_priv(sch);
210 struct htb_class *cl;
211 struct tcf_result res;
212 struct tcf_proto *tcf;
213 int result;
214
215 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
216 * note that nfmark can be used too by attaching filter fw with no
217 * rules in it
218 */
1da177e4 219 if (skb->priority == sch->handle)
87990467 220 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456 221 cl = htb_find(skb->priority, sch);
29824310
HM
222 if (cl) {
223 if (cl->level == 0)
224 return cl;
225 /* Start with inner filter chain if a non-leaf class is selected */
226 tcf = cl->filter_list;
227 } else {
228 tcf = q->filter_list;
229 }
1da177e4 230
c27f339a 231 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
232 while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
233#ifdef CONFIG_NET_CLS_ACT
234 switch (result) {
235 case TC_ACT_QUEUED:
87990467 236 case TC_ACT_STOLEN:
378a2f09 237 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1da177e4
LT
238 case TC_ACT_SHOT:
239 return NULL;
240 }
1da177e4 241#endif
cc7ec456
ED
242 cl = (void *)res.class;
243 if (!cl) {
1da177e4 244 if (res.classid == sch->handle)
87990467 245 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
246 cl = htb_find(res.classid, sch);
247 if (!cl)
87990467 248 break; /* filter selected invalid classid */
1da177e4
LT
249 }
250 if (!cl->level)
87990467 251 return cl; /* we hit leaf; return it */
1da177e4
LT
252
253 /* we have got inner class; apply inner filter chain */
254 tcf = cl->filter_list;
255 }
256 /* classification failed; try to use default class */
87990467 257 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 258 if (!cl || cl->level)
87990467 259 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
260 return cl;
261}
262
1da177e4
LT
263/**
264 * htb_add_to_id_tree - adds class to the round robin list
265 *
266 * Routine adds class to the list (actually tree) sorted by classid.
267 * Make sure that class is not already on such list for given prio.
268 */
87990467
SH
269static void htb_add_to_id_tree(struct rb_root *root,
270 struct htb_class *cl, int prio)
1da177e4
LT
271{
272 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 273
1da177e4 274 while (*p) {
87990467
SH
275 struct htb_class *c;
276 parent = *p;
1da177e4 277 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 278
f4c1f3e0 279 if (cl->common.classid > c->common.classid)
1da177e4 280 p = &parent->rb_right;
87990467 281 else
1da177e4
LT
282 p = &parent->rb_left;
283 }
284 rb_link_node(&cl->node[prio], parent, p);
285 rb_insert_color(&cl->node[prio], root);
286}
287
288/**
289 * htb_add_to_wait_tree - adds class to the event queue with delay
290 *
291 * The class is added to priority event queue to indicate that class will
292 * change its mode in cl->pq_key microseconds. Make sure that class is not
293 * already in the queue.
294 */
87990467 295static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 296 struct htb_class *cl, s64 delay)
1da177e4 297{
c9364636 298 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
3bf72957 299
fb983d45
PM
300 cl->pq_key = q->now + delay;
301 if (cl->pq_key == q->now)
1da177e4
LT
302 cl->pq_key++;
303
304 /* update the nearest event cache */
fb983d45 305 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 306 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 307
1da177e4 308 while (*p) {
87990467
SH
309 struct htb_class *c;
310 parent = *p;
1da177e4 311 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 312 if (cl->pq_key >= c->pq_key)
1da177e4 313 p = &parent->rb_right;
87990467 314 else
1da177e4
LT
315 p = &parent->rb_left;
316 }
317 rb_link_node(&cl->pq_node, parent, p);
c9364636 318 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4
LT
319}
320
321/**
322 * htb_next_rb_node - finds next node in binary tree
323 *
324 * When we are past last key we return NULL.
325 * Average complexity is 2 steps per call.
326 */
3696f625 327static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
328{
329 *n = rb_next(*n);
330}
331
332/**
333 * htb_add_class_to_row - add class to its row
334 *
335 * The class is added to row at priorities marked in mask.
336 * It does nothing if mask == 0.
337 */
87990467
SH
338static inline void htb_add_class_to_row(struct htb_sched *q,
339 struct htb_class *cl, int mask)
1da177e4 340{
1da177e4
LT
341 q->row_mask[cl->level] |= mask;
342 while (mask) {
343 int prio = ffz(~mask);
344 mask &= ~(1 << prio);
c9364636 345 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
1da177e4
LT
346 }
347}
348
3696f625
SH
349/* If this triggers, it is a bug in this code, but it need not be fatal */
350static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
351{
81771b3b 352 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
353 WARN_ON(1);
354 } else {
355 rb_erase(rb, root);
356 RB_CLEAR_NODE(rb);
357 }
358}
359
360
1da177e4
LT
361/**
362 * htb_remove_class_from_row - removes class from its row
363 *
364 * The class is removed from row at priorities marked in mask.
365 * It does nothing if mask == 0.
366 */
87990467
SH
367static inline void htb_remove_class_from_row(struct htb_sched *q,
368 struct htb_class *cl, int mask)
1da177e4
LT
369{
370 int m = 0;
c9364636 371 struct htb_level *hlevel = &q->hlevel[cl->level];
3bf72957 372
1da177e4
LT
373 while (mask) {
374 int prio = ffz(~mask);
c9364636 375 struct htb_prio *hprio = &hlevel->hprio[prio];
3696f625 376
1da177e4 377 mask &= ~(1 << prio);
c9364636
ED
378 if (hprio->ptr == cl->node + prio)
379 htb_next_rb_node(&hprio->ptr);
3696f625 380
c9364636
ED
381 htb_safe_rb_erase(cl->node + prio, &hprio->row);
382 if (!hprio->row.rb_node)
1da177e4
LT
383 m |= 1 << prio;
384 }
1da177e4
LT
385 q->row_mask[cl->level] &= ~m;
386}
387
388/**
389 * htb_activate_prios - creates active classe's feed chain
390 *
391 * The class is connected to ancestors and/or appropriate rows
10297b99 392 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
393 * (activated) mode. It does nothing if cl->prio_activity == 0.
394 */
87990467 395static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
396{
397 struct htb_class *p = cl->parent;
87990467 398 long m, mask = cl->prio_activity;
1da177e4
LT
399
400 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
401 m = mask;
402 while (m) {
1da177e4
LT
403 int prio = ffz(~m);
404 m &= ~(1 << prio);
87990467 405
c9364636 406 if (p->un.inner.clprio[prio].feed.rb_node)
1da177e4 407 /* parent already has its feed in use so that
cc7ec456
ED
408 * reset bit in mask as parent is already ok
409 */
1da177e4 410 mask &= ~(1 << prio);
87990467 411
c9364636 412 htb_add_to_id_tree(&p->un.inner.clprio[prio].feed, cl, prio);
1da177e4 413 }
1da177e4 414 p->prio_activity |= mask;
87990467
SH
415 cl = p;
416 p = cl->parent;
3bf72957 417
1da177e4
LT
418 }
419 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 420 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
421}
422
423/**
424 * htb_deactivate_prios - remove class from feed chain
425 *
10297b99 426 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
427 * nothing if cl->prio_activity == 0. Class is removed from all feed
428 * chains and rows.
429 */
430static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
431{
432 struct htb_class *p = cl->parent;
87990467 433 long m, mask = cl->prio_activity;
1da177e4
LT
434
435 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
436 m = mask;
437 mask = 0;
1da177e4
LT
438 while (m) {
439 int prio = ffz(~m);
440 m &= ~(1 << prio);
87990467 441
c9364636 442 if (p->un.inner.clprio[prio].ptr == cl->node + prio) {
1da177e4 443 /* we are removing child which is pointed to from
cc7ec456
ED
444 * parent feed - forget the pointer but remember
445 * classid
446 */
c9364636
ED
447 p->un.inner.clprio[prio].last_ptr_id = cl->common.classid;
448 p->un.inner.clprio[prio].ptr = NULL;
1da177e4 449 }
87990467 450
c9364636
ED
451 htb_safe_rb_erase(cl->node + prio,
452 &p->un.inner.clprio[prio].feed);
87990467 453
c9364636 454 if (!p->un.inner.clprio[prio].feed.rb_node)
1da177e4
LT
455 mask |= 1 << prio;
456 }
3bf72957 457
1da177e4 458 p->prio_activity &= ~mask;
87990467
SH
459 cl = p;
460 p = cl->parent;
3bf72957 461
1da177e4 462 }
87990467
SH
463 if (cl->cmode == HTB_CAN_SEND && mask)
464 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
465}
466
56b765b7 467static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 468{
47083fc0
JDB
469 if (htb_hysteresis)
470 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
471 else
472 return 0;
18a63e86 473}
56b765b7 474static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 475{
47083fc0
JDB
476 if (htb_hysteresis)
477 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
478 else
479 return 0;
18a63e86 480}
47083fc0 481
18a63e86 482
1da177e4
LT
483/**
484 * htb_class_mode - computes and returns current class mode
485 *
486 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
487 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 488 * from now to time when cl will change its state.
1da177e4 489 * Also it is worth to note that class mode doesn't change simply
10297b99 490 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
491 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
492 * mode transitions per time unit. The speed gain is about 1/6.
493 */
87990467 494static inline enum htb_cmode
56b765b7 495htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 496{
56b765b7 497 s64 toks;
1da177e4 498
87990467
SH
499 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
500 *diff = -toks;
501 return HTB_CANT_SEND;
502 }
18a63e86 503
87990467
SH
504 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
505 return HTB_CAN_SEND;
1da177e4 506
87990467
SH
507 *diff = -toks;
508 return HTB_MAY_BORROW;
1da177e4
LT
509}
510
511/**
512 * htb_change_class_mode - changes classe's mode
513 *
514 * This should be the only way how to change classe's mode under normal
515 * cirsumstances. Routine will update feed lists linkage, change mode
516 * and add class to the wait event queue if appropriate. New mode should
517 * be different from old one and cl->pq_key has to be valid if changing
518 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
519 */
87990467 520static void
56b765b7 521htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
522{
523 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
524
525 if (new_mode == cl->cmode)
87990467
SH
526 return;
527
528 if (cl->prio_activity) { /* not necessary: speed optimization */
529 if (cl->cmode != HTB_CANT_SEND)
530 htb_deactivate_prios(q, cl);
1da177e4 531 cl->cmode = new_mode;
87990467
SH
532 if (new_mode != HTB_CANT_SEND)
533 htb_activate_prios(q, cl);
534 } else
1da177e4
LT
535 cl->cmode = new_mode;
536}
537
538/**
10297b99 539 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
540 *
541 * Routine learns (new) priority of leaf and activates feed chain
542 * for the prio. It can be called on already active leaf safely.
543 * It also adds leaf into droplist.
544 */
87990467 545static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 546{
547b792c 547 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
3bf72957 548
1da177e4 549 if (!cl->prio_activity) {
c19f7a34 550 cl->prio_activity = 1 << cl->prio;
87990467
SH
551 htb_activate_prios(q, cl);
552 list_add_tail(&cl->un.leaf.drop_list,
c19f7a34 553 q->drops + cl->prio);
1da177e4
LT
554 }
555}
556
557/**
10297b99 558 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
559 *
560 * Make sure that leaf is active. In the other words it can't be called
561 * with non-active leaf. It also removes class from the drop list.
562 */
87990467 563static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 564{
547b792c 565 WARN_ON(!cl->prio_activity);
3bf72957 566
87990467 567 htb_deactivate_prios(q, cl);
1da177e4
LT
568 cl->prio_activity = 0;
569 list_del_init(&cl->un.leaf.drop_list);
570}
571
572static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
573{
f30ab418 574 int uninitialized_var(ret);
87990467
SH
575 struct htb_sched *q = qdisc_priv(sch);
576 struct htb_class *cl = htb_classify(skb, sch, &ret);
577
578 if (cl == HTB_DIRECT) {
579 /* enqueue to helper queue */
580 if (q->direct_queue.qlen < q->direct_qlen) {
581 __skb_queue_tail(&q->direct_queue, skb);
582 q->direct_pkts++;
583 } else {
17045755 584 return qdisc_drop(skb, sch);
87990467 585 }
1da177e4 586#ifdef CONFIG_NET_CLS_ACT
87990467 587 } else if (!cl) {
c27f339a 588 if (ret & __NET_XMIT_BYPASS)
87990467
SH
589 sch->qstats.drops++;
590 kfree_skb(skb);
591 return ret;
1da177e4 592#endif
378a2f09
JP
593 } else if ((ret = qdisc_enqueue(skb, cl->un.leaf.q)) != NET_XMIT_SUCCESS) {
594 if (net_xmit_drop_count(ret)) {
595 sch->qstats.drops++;
596 cl->qstats.drops++;
597 }
69747650 598 return ret;
87990467 599 } else {
87990467
SH
600 htb_activate(q, cl);
601 }
602
603 sch->q.qlen++;
87990467 604 return NET_XMIT_SUCCESS;
1da177e4
LT
605}
606
56b765b7 607static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 608{
56b765b7 609 s64 toks = diff + cl->tokens;
59e4220a
JP
610
611 if (toks > cl->buffer)
612 toks = cl->buffer;
292f1c7f 613 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
614 if (toks <= -cl->mbuffer)
615 toks = 1 - cl->mbuffer;
616
617 cl->tokens = toks;
618}
619
56b765b7 620static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 621{
56b765b7 622 s64 toks = diff + cl->ctokens;
59e4220a
JP
623
624 if (toks > cl->cbuffer)
625 toks = cl->cbuffer;
292f1c7f 626 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
627 if (toks <= -cl->mbuffer)
628 toks = 1 - cl->mbuffer;
629
630 cl->ctokens = toks;
631}
632
1da177e4
LT
633/**
634 * htb_charge_class - charges amount "bytes" to leaf and ancestors
635 *
636 * Routine assumes that packet "bytes" long was dequeued from leaf cl
637 * borrowing from "level". It accounts bytes to ceil leaky bucket for
638 * leaf and all ancestors and to rate bucket for ancestors at levels
639 * "level" and higher. It also handles possible change of mode resulting
640 * from the update. Note that mode can also increase here (MAY_BORROW to
641 * CAN_SEND) because we can use more precise clock that event queue here.
642 * In such case we remove class from event queue first.
643 */
87990467 644static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 645 int level, struct sk_buff *skb)
87990467 646{
0abf77e5 647 int bytes = qdisc_pkt_len(skb);
1da177e4 648 enum htb_cmode old_mode;
56b765b7 649 s64 diff;
1da177e4
LT
650
651 while (cl) {
56b765b7 652 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 653 if (cl->level >= level) {
87990467
SH
654 if (cl->level == level)
655 cl->xstats.lends++;
59e4220a 656 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
657 } else {
658 cl->xstats.borrows++;
87990467 659 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 660 }
59e4220a 661 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 662 cl->t_c = q->now;
1da177e4 663
87990467
SH
664 old_mode = cl->cmode;
665 diff = 0;
666 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
667 if (old_mode != cl->cmode) {
668 if (old_mode != HTB_CAN_SEND)
c9364636 669 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4 670 if (cl->cmode != HTB_CAN_SEND)
87990467 671 htb_add_to_wait_tree(q, cl, diff);
1da177e4 672 }
1da177e4 673
bfe0d029
ED
674 /* update basic stats except for leaves which are already updated */
675 if (cl->level)
676 bstats_update(&cl->bstats, skb);
677
1da177e4
LT
678 cl = cl->parent;
679 }
680}
681
682/**
683 * htb_do_events - make mode changes to classes at the level
684 *
fb983d45 685 * Scans event queue for pending events and applies them. Returns time of
1224736d 686 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 687 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 688 */
c9364636 689static s64 htb_do_events(struct htb_sched *q, const int level,
5343a7f8 690 unsigned long start)
1da177e4 691{
8f3ea33a 692 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
693 * 1 to simplify things when jiffy is going to be incremented
694 * too soon
695 */
a73be040 696 unsigned long stop_at = start + 2;
c9364636
ED
697 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
698
8f3ea33a 699 while (time_before(jiffies, stop_at)) {
1da177e4 700 struct htb_class *cl;
56b765b7 701 s64 diff;
c9364636 702 struct rb_node *p = rb_first(wait_pq);
30bdbe39 703
87990467
SH
704 if (!p)
705 return 0;
1da177e4
LT
706
707 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
708 if (cl->pq_key > q->now)
709 return cl->pq_key;
710
c9364636 711 htb_safe_rb_erase(p, wait_pq);
56b765b7 712 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 713 htb_change_class_mode(q, cl, &diff);
1da177e4 714 if (cl->cmode != HTB_CAN_SEND)
87990467 715 htb_add_to_wait_tree(q, cl, diff);
1da177e4 716 }
1224736d
JP
717
718 /* too much load - let's continue after a break for scheduling */
e82181de 719 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
c17988a9 720 pr_warn("htb: too many events!\n");
e82181de
JP
721 q->warned |= HTB_WARN_TOOMANYEVENTS;
722 }
1224736d
JP
723
724 return q->now;
1da177e4
LT
725}
726
727/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
728 * is no such one exists.
729 */
87990467
SH
730static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
731 u32 id)
1da177e4
LT
732{
733 struct rb_node *r = NULL;
734 while (n) {
87990467
SH
735 struct htb_class *cl =
736 rb_entry(n, struct htb_class, node[prio]);
87990467 737
f4c1f3e0 738 if (id > cl->common.classid) {
1da177e4 739 n = n->rb_right;
1b5c0077 740 } else if (id < cl->common.classid) {
1da177e4
LT
741 r = n;
742 n = n->rb_left;
1b5c0077
JP
743 } else {
744 return n;
1da177e4
LT
745 }
746 }
747 return r;
748}
749
750/**
751 * htb_lookup_leaf - returns next leaf class in DRR order
752 *
753 * Find leaf where current feed pointers points to.
754 */
c9364636 755static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
1da177e4
LT
756{
757 int i;
758 struct {
759 struct rb_node *root;
760 struct rb_node **pptr;
761 u32 *pid;
87990467
SH
762 } stk[TC_HTB_MAXDEPTH], *sp = stk;
763
c9364636
ED
764 BUG_ON(!hprio->row.rb_node);
765 sp->root = hprio->row.rb_node;
766 sp->pptr = &hprio->ptr;
767 sp->pid = &hprio->last_ptr_id;
1da177e4
LT
768
769 for (i = 0; i < 65535; i++) {
87990467 770 if (!*sp->pptr && *sp->pid) {
10297b99 771 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
772 * the original or next ptr
773 */
87990467
SH
774 *sp->pptr =
775 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 776 }
87990467 777 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
778 * can become out of date quickly
779 */
87990467 780 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 781 *sp->pptr = sp->root;
87990467 782 while ((*sp->pptr)->rb_left)
1da177e4
LT
783 *sp->pptr = (*sp->pptr)->rb_left;
784 if (sp > stk) {
785 sp--;
512bb43e
JP
786 if (!*sp->pptr) {
787 WARN_ON(1);
87990467 788 return NULL;
512bb43e 789 }
87990467 790 htb_next_rb_node(sp->pptr);
1da177e4
LT
791 }
792 } else {
793 struct htb_class *cl;
c9364636
ED
794 struct htb_prio *clp;
795
87990467
SH
796 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
797 if (!cl->level)
1da177e4 798 return cl;
c9364636
ED
799 clp = &cl->un.inner.clprio[prio];
800 (++sp)->root = clp->feed.rb_node;
801 sp->pptr = &clp->ptr;
802 sp->pid = &clp->last_ptr_id;
1da177e4
LT
803 }
804 }
547b792c 805 WARN_ON(1);
1da177e4
LT
806 return NULL;
807}
808
809/* dequeues packet at given priority and level; call only if
cc7ec456
ED
810 * you are sure that there is active class at prio/level
811 */
c9364636
ED
812static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
813 const int level)
1da177e4
LT
814{
815 struct sk_buff *skb = NULL;
87990467 816 struct htb_class *cl, *start;
c9364636
ED
817 struct htb_level *hlevel = &q->hlevel[level];
818 struct htb_prio *hprio = &hlevel->hprio[prio];
819
1da177e4 820 /* look initial class up in the row */
c9364636 821 start = cl = htb_lookup_leaf(hprio, prio);
87990467 822
1da177e4
LT
823 do {
824next:
512bb43e 825 if (unlikely(!cl))
87990467 826 return NULL;
1da177e4
LT
827
828 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
829 * qdisc drops packets in enqueue routine or if someone used
830 * graft operation on the leaf since last dequeue;
831 * simply deactivate and skip such class
832 */
1da177e4
LT
833 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
834 struct htb_class *next;
87990467 835 htb_deactivate(q, cl);
1da177e4
LT
836
837 /* row/level might become empty */
838 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 839 return NULL;
1da177e4 840
c9364636 841 next = htb_lookup_leaf(hprio, prio);
87990467
SH
842
843 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
844 start = next;
845 cl = next;
846 goto next;
847 }
87990467
SH
848
849 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
850 if (likely(skb != NULL))
1da177e4 851 break;
633fe66e 852
b00355db 853 qdisc_warn_nonwc("htb", cl->un.leaf.q);
c9364636
ED
854 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr:
855 &q->hlevel[0].hprio[prio].ptr);
856 cl = htb_lookup_leaf(hprio, prio);
1da177e4
LT
857
858 } while (cl != start);
859
860 if (likely(skb != NULL)) {
196d97f6 861 bstats_update(&cl->bstats, skb);
0abf77e5
JK
862 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
863 if (cl->un.leaf.deficit[level] < 0) {
c19f7a34 864 cl->un.leaf.deficit[level] += cl->quantum;
c9364636
ED
865 htb_next_rb_node(level ? &cl->parent->un.inner.clprio[prio].ptr :
866 &q->hlevel[0].hprio[prio].ptr);
1da177e4
LT
867 }
868 /* this used to be after charge_class but this constelation
cc7ec456
ED
869 * gives us slightly better performance
870 */
1da177e4 871 if (!cl->un.leaf.q->q.qlen)
87990467 872 htb_deactivate(q, cl);
c9726d68 873 htb_charge_class(q, cl, level, skb);
1da177e4
LT
874 }
875 return skb;
876}
877
1da177e4
LT
878static struct sk_buff *htb_dequeue(struct Qdisc *sch)
879{
9190b3b3 880 struct sk_buff *skb;
1da177e4
LT
881 struct htb_sched *q = qdisc_priv(sch);
882 int level;
5343a7f8 883 s64 next_event;
a73be040 884 unsigned long start_at;
1da177e4
LT
885
886 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
87990467
SH
887 skb = __skb_dequeue(&q->direct_queue);
888 if (skb != NULL) {
9190b3b3
ED
889ok:
890 qdisc_bstats_update(sch, skb);
fd245a4a 891 qdisc_unthrottled(sch);
1da177e4
LT
892 sch->q.qlen--;
893 return skb;
894 }
895
87990467
SH
896 if (!sch->q.qlen)
897 goto fin;
56b765b7 898 q->now = ktime_to_ns(ktime_get());
a73be040 899 start_at = jiffies;
1da177e4 900
d2fe85da 901 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 902
1da177e4
LT
903 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
904 /* common case optimization - skip event handler quickly */
905 int m;
c9364636 906 s64 event = q->near_ev_cache[level];
fb983d45 907
c9364636 908 if (q->now >= event) {
a73be040 909 event = htb_do_events(q, level, start_at);
2e4b3b0e 910 if (!event)
56b765b7 911 event = q->now + NSEC_PER_SEC;
2e4b3b0e 912 q->near_ev_cache[level] = event;
c9364636 913 }
fb983d45 914
c0851347 915 if (next_event > event)
fb983d45 916 next_event = event;
87990467 917
1da177e4
LT
918 m = ~q->row_mask[level];
919 while (m != (int)(-1)) {
87990467 920 int prio = ffz(m);
cc7ec456 921
1da177e4 922 m |= 1 << prio;
87990467 923 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
924 if (likely(skb != NULL))
925 goto ok;
1da177e4
LT
926 }
927 }
fb983d45 928 sch->qstats.overlimits++;
56b765b7
V
929 if (likely(next_event > q->now)) {
930 if (!test_bit(__QDISC_STATE_DEACTIVATED,
931 &qdisc_root_sleeping(q->watchdog.qdisc)->state)) {
932 ktime_t time = ns_to_ktime(next_event);
933 qdisc_throttled(q->watchdog.qdisc);
934 hrtimer_start(&q->watchdog.timer, time,
935 HRTIMER_MODE_ABS);
936 }
937 } else {
1224736d 938 schedule_work(&q->work);
56b765b7 939 }
1da177e4 940fin:
1da177e4
LT
941 return skb;
942}
943
944/* try to drop from each class (by prio) until one succeed */
87990467 945static unsigned int htb_drop(struct Qdisc *sch)
1da177e4
LT
946{
947 struct htb_sched *q = qdisc_priv(sch);
948 int prio;
949
950 for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) {
951 struct list_head *p;
87990467 952 list_for_each(p, q->drops + prio) {
1da177e4
LT
953 struct htb_class *cl = list_entry(p, struct htb_class,
954 un.leaf.drop_list);
955 unsigned int len;
87990467
SH
956 if (cl->un.leaf.q->ops->drop &&
957 (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
1da177e4
LT
958 sch->q.qlen--;
959 if (!cl->un.leaf.q->q.qlen)
87990467 960 htb_deactivate(q, cl);
1da177e4
LT
961 return len;
962 }
963 }
964 }
965 return 0;
966}
967
968/* reset all classes */
969/* always caled under BH & queue lock */
87990467 970static void htb_reset(struct Qdisc *sch)
1da177e4
LT
971{
972 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 973 struct htb_class *cl;
f4c1f3e0 974 unsigned int i;
0cef296d 975
f4c1f3e0 976 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 977 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 978 if (cl->level)
87990467 979 memset(&cl->un.inner, 0, sizeof(cl->un.inner));
1da177e4 980 else {
87990467 981 if (cl->un.leaf.q)
1da177e4
LT
982 qdisc_reset(cl->un.leaf.q);
983 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
984 }
985 cl->prio_activity = 0;
986 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
987
988 }
989 }
fb983d45 990 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
991 __skb_queue_purge(&q->direct_queue);
992 sch->q.qlen = 0;
c9364636 993 memset(q->hlevel, 0, sizeof(q->hlevel));
87990467 994 memset(q->row_mask, 0, sizeof(q->row_mask));
1da177e4 995 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 996 INIT_LIST_HEAD(q->drops + i);
1da177e4
LT
997}
998
27a3421e
PM
999static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
1000 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
1001 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
1002 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1003 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 1004 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
df62cdf3
ED
1005 [TCA_HTB_RATE64] = { .type = NLA_U64 },
1006 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
27a3421e
PM
1007};
1008
1224736d
JP
1009static void htb_work_func(struct work_struct *work)
1010{
1011 struct htb_sched *q = container_of(work, struct htb_sched, work);
1012 struct Qdisc *sch = q->watchdog.qdisc;
1013
1014 __netif_schedule(qdisc_root(sch));
1015}
1016
1e90474c 1017static int htb_init(struct Qdisc *sch, struct nlattr *opt)
1da177e4
LT
1018{
1019 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 1020 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1021 struct tc_htb_glob *gopt;
cee63723 1022 int err;
1da177e4 1023 int i;
cee63723
PM
1024
1025 if (!opt)
1026 return -EINVAL;
1027
6906f4ed 1028 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1029 if (err < 0)
1030 return err;
1031
6906f4ed 1032 if (!tb[TCA_HTB_INIT])
1da177e4 1033 return -EINVAL;
6906f4ed 1034
1e90474c 1035 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1036 if (gopt->version != HTB_VER >> 16)
1da177e4 1037 return -EINVAL;
1da177e4 1038
f4c1f3e0
PM
1039 err = qdisc_class_hash_init(&q->clhash);
1040 if (err < 0)
1041 return err;
1da177e4 1042 for (i = 0; i < TC_HTB_NUMPRIO; i++)
87990467 1043 INIT_LIST_HEAD(q->drops + i);
1da177e4 1044
fb983d45 1045 qdisc_watchdog_init(&q->watchdog, sch);
1224736d 1046 INIT_WORK(&q->work, htb_work_func);
1da177e4
LT
1047 skb_queue_head_init(&q->direct_queue);
1048
6906f4ed
ED
1049 if (tb[TCA_HTB_DIRECT_QLEN])
1050 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
1051 else {
1052 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
1053 if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
1054 q->direct_qlen = 2;
1055 }
1da177e4
LT
1056 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1057 q->rate2quantum = 1;
1058 q->defcls = gopt->defcls;
1059
1060 return 0;
1061}
1062
1063static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1064{
1065 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1066 struct nlattr *nest;
1da177e4 1067 struct tc_htb_glob gopt;
4b3550ef 1068
6f542efc
ED
1069 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1070 * no change can happen on the qdisc parameters.
1071 */
1da177e4 1072
4b3550ef 1073 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1074 gopt.version = HTB_VER;
1075 gopt.rate2quantum = q->rate2quantum;
1076 gopt.defcls = q->defcls;
3bf72957 1077 gopt.debug = 0;
4b3550ef
PM
1078
1079 nest = nla_nest_start(skb, TCA_OPTIONS);
1080 if (nest == NULL)
1081 goto nla_put_failure;
6906f4ed
ED
1082 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1083 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1084 goto nla_put_failure;
4b3550ef 1085
6f542efc 1086 return nla_nest_end(skb, nest);
4b3550ef 1087
1e90474c 1088nla_put_failure:
4b3550ef 1089 nla_nest_cancel(skb, nest);
1da177e4
LT
1090 return -1;
1091}
1092
1093static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1094 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1095{
87990467 1096 struct htb_class *cl = (struct htb_class *)arg;
4b3550ef 1097 struct nlattr *nest;
1da177e4
LT
1098 struct tc_htb_opt opt;
1099
6f542efc
ED
1100 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1101 * no change can happen on the class parameters.
1102 */
f4c1f3e0
PM
1103 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1104 tcm->tcm_handle = cl->common.classid;
1da177e4
LT
1105 if (!cl->level && cl->un.leaf.q)
1106 tcm->tcm_info = cl->un.leaf.q->handle;
1107
4b3550ef
PM
1108 nest = nla_nest_start(skb, TCA_OPTIONS);
1109 if (nest == NULL)
1110 goto nla_put_failure;
1da177e4 1111
87990467 1112 memset(&opt, 0, sizeof(opt));
1da177e4 1113
01cb71d2 1114 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1115 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1116 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1117 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1118 opt.quantum = cl->quantum;
1119 opt.prio = cl->prio;
87990467 1120 opt.level = cl->level;
1b34ec43
DM
1121 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1122 goto nla_put_failure;
df62cdf3
ED
1123 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
1124 nla_put_u64(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps))
1125 goto nla_put_failure;
1126 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
1127 nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
1128 goto nla_put_failure;
4b3550ef 1129
6f542efc 1130 return nla_nest_end(skb, nest);
4b3550ef 1131
1e90474c 1132nla_put_failure:
4b3550ef 1133 nla_nest_cancel(skb, nest);
1da177e4
LT
1134 return -1;
1135}
1136
1137static int
87990467 1138htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1139{
87990467 1140 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1141
1da177e4
LT
1142 if (!cl->level && cl->un.leaf.q)
1143 cl->qstats.qlen = cl->un.leaf.q->q.qlen;
5343a7f8
ED
1144 cl->xstats.tokens = PSCHED_NS2TICKS(cl->tokens);
1145 cl->xstats.ctokens = PSCHED_NS2TICKS(cl->ctokens);
1da177e4
LT
1146
1147 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
d250a5f9 1148 gnet_stats_copy_rate_est(d, NULL, &cl->rate_est) < 0 ||
1da177e4
LT
1149 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1150 return -1;
1151
1152 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1153}
1154
1155static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
87990467 1156 struct Qdisc **old)
1da177e4 1157{
87990467 1158 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1159
5b9a9ccf
PM
1160 if (cl->level)
1161 return -EINVAL;
1162 if (new == NULL &&
3511c913 1163 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
5b9a9ccf
PM
1164 cl->common.classid)) == NULL)
1165 return -ENOBUFS;
1166
1167 sch_tree_lock(sch);
1168 *old = cl->un.leaf.q;
1169 cl->un.leaf.q = new;
1170 if (*old != NULL) {
1171 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1172 qdisc_reset(*old);
1da177e4 1173 }
5b9a9ccf
PM
1174 sch_tree_unlock(sch);
1175 return 0;
1da177e4
LT
1176}
1177
87990467 1178static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1179{
87990467 1180 struct htb_class *cl = (struct htb_class *)arg;
5b9a9ccf 1181 return !cl->level ? cl->un.leaf.q : NULL;
1da177e4
LT
1182}
1183
256d61b8
PM
1184static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1185{
1186 struct htb_class *cl = (struct htb_class *)arg;
1187
1188 if (cl->un.leaf.q->q.qlen == 0)
1189 htb_deactivate(qdisc_priv(sch), cl);
1190}
1191
1da177e4
LT
1192static unsigned long htb_get(struct Qdisc *sch, u32 classid)
1193{
87990467
SH
1194 struct htb_class *cl = htb_find(classid, sch);
1195 if (cl)
1da177e4
LT
1196 cl->refcnt++;
1197 return (unsigned long)cl;
1198}
1199
160d5e10
JP
1200static inline int htb_parent_last_child(struct htb_class *cl)
1201{
1202 if (!cl->parent)
1203 /* the root class */
1204 return 0;
42077599 1205 if (cl->parent->children > 1)
160d5e10
JP
1206 /* not the last child */
1207 return 0;
160d5e10
JP
1208 return 1;
1209}
1210
3ba08b00
JP
1211static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1212 struct Qdisc *new_q)
160d5e10
JP
1213{
1214 struct htb_class *parent = cl->parent;
1215
547b792c 1216 WARN_ON(cl->level || !cl->un.leaf.q || cl->prio_activity);
160d5e10 1217
3ba08b00 1218 if (parent->cmode != HTB_CAN_SEND)
c9364636
ED
1219 htb_safe_rb_erase(&parent->pq_node,
1220 &q->hlevel[parent->level].wait_pq);
3ba08b00 1221
160d5e10
JP
1222 parent->level = 0;
1223 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1224 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1225 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1226 parent->tokens = parent->buffer;
1227 parent->ctokens = parent->cbuffer;
5343a7f8 1228 parent->t_c = ktime_to_ns(ktime_get());
160d5e10
JP
1229 parent->cmode = HTB_CAN_SEND;
1230}
1231
87990467 1232static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1233{
1da177e4 1234 if (!cl->level) {
547b792c 1235 WARN_ON(!cl->un.leaf.q);
1da177e4
LT
1236 qdisc_destroy(cl->un.leaf.q);
1237 }
ee39e10c 1238 gen_kill_estimator(&cl->bstats, &cl->rate_est);
ff31ab56 1239 tcf_destroy_chain(&cl->filter_list);
1da177e4
LT
1240 kfree(cl);
1241}
1242
87990467 1243static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1244{
1245 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1246 struct hlist_node *next;
fbd8f137
PM
1247 struct htb_class *cl;
1248 unsigned int i;
1da177e4 1249
1224736d 1250 cancel_work_sync(&q->work);
fb983d45 1251 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1252 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1253 * and surprisingly it worked in 2.4. But it must precede it
1254 * because filter need its target class alive to be able to call
1255 * unbind_filter on it (without Oops).
1256 */
ff31ab56 1257 tcf_destroy_chain(&q->filter_list);
87990467 1258
f4c1f3e0 1259 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1260 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
fbd8f137
PM
1261 tcf_destroy_chain(&cl->filter_list);
1262 }
f4c1f3e0 1263 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1264 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
f4c1f3e0 1265 common.hnode)
fbd8f137
PM
1266 htb_destroy_class(sch, cl);
1267 }
f4c1f3e0 1268 qdisc_class_hash_destroy(&q->clhash);
1da177e4
LT
1269 __skb_queue_purge(&q->direct_queue);
1270}
1271
1272static int htb_delete(struct Qdisc *sch, unsigned long arg)
1273{
1274 struct htb_sched *q = qdisc_priv(sch);
87990467 1275 struct htb_class *cl = (struct htb_class *)arg;
256d61b8 1276 unsigned int qlen;
160d5e10
JP
1277 struct Qdisc *new_q = NULL;
1278 int last_child = 0;
1da177e4 1279
a071d272
YY
1280 /* TODO: why don't allow to delete subtree ? references ? does
1281 * tc subsys guarantee us that in htb_destroy it holds no class
1282 * refs so that we can remove children safely there ?
1283 */
42077599 1284 if (cl->children || cl->filter_cnt)
1da177e4 1285 return -EBUSY;
87990467 1286
160d5e10 1287 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1288 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
bb949fbd 1289 cl->parent->common.classid);
160d5e10
JP
1290 last_child = 1;
1291 }
1292
1da177e4 1293 sch_tree_lock(sch);
87990467 1294
814a175e 1295 if (!cl->level) {
256d61b8 1296 qlen = cl->un.leaf.q->q.qlen;
814a175e 1297 qdisc_reset(cl->un.leaf.q);
256d61b8 1298 qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
814a175e
PM
1299 }
1300
f4c1f3e0
PM
1301 /* delete from hash and active; remainder in destroy_class */
1302 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1303 if (cl->parent)
1304 cl->parent->children--;
c38c83cb 1305
1da177e4 1306 if (cl->prio_activity)
87990467 1307 htb_deactivate(q, cl);
1da177e4 1308
fbd8f137 1309 if (cl->cmode != HTB_CAN_SEND)
c9364636
ED
1310 htb_safe_rb_erase(&cl->pq_node,
1311 &q->hlevel[cl->level].wait_pq);
fbd8f137 1312
160d5e10 1313 if (last_child)
3ba08b00 1314 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1315
7cd0a638
JP
1316 BUG_ON(--cl->refcnt == 0);
1317 /*
1318 * This shouldn't happen: we "hold" one cops->get() when called
1319 * from tc_ctl_tclass; the destroy method is done from cops->put().
1320 */
1da177e4
LT
1321
1322 sch_tree_unlock(sch);
1323 return 0;
1324}
1325
1326static void htb_put(struct Qdisc *sch, unsigned long arg)
1327{
87990467 1328 struct htb_class *cl = (struct htb_class *)arg;
1da177e4
LT
1329
1330 if (--cl->refcnt == 0)
87990467 1331 htb_destroy_class(sch, cl);
1da177e4
LT
1332}
1333
87990467 1334static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1335 u32 parentid, struct nlattr **tca,
87990467 1336 unsigned long *arg)
1da177e4
LT
1337{
1338 int err = -EINVAL;
1339 struct htb_sched *q = qdisc_priv(sch);
87990467 1340 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1341 struct nlattr *opt = tca[TCA_OPTIONS];
6906f4ed 1342 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1343 struct tc_htb_opt *hopt;
df62cdf3 1344 u64 rate64, ceil64;
1da177e4
LT
1345
1346 /* extract all subattrs from opt attr */
cee63723
PM
1347 if (!opt)
1348 goto failure;
1349
e18434c4 1350 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
cee63723
PM
1351 if (err < 0)
1352 goto failure;
1353
1354 err = -EINVAL;
27a3421e 1355 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1356 goto failure;
1da177e4 1357
87990467
SH
1358 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1359
1e90474c 1360 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1361 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1362 goto failure;
1da177e4 1363
8a8e3d84 1364 /* Keeping backward compatible with rate_table based iproute2 tc */
6b1dd856
YY
1365 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
1366 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]));
1367
1368 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
1369 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]));
8a8e3d84 1370
87990467 1371 if (!cl) { /* new class */
1da177e4 1372 struct Qdisc *new_q;
3696f625 1373 int prio;
ee39e10c 1374 struct {
1e90474c 1375 struct nlattr nla;
ee39e10c
PM
1376 struct gnet_estimator opt;
1377 } est = {
1e90474c
PM
1378 .nla = {
1379 .nla_len = nla_attr_size(sizeof(est.opt)),
1380 .nla_type = TCA_RATE,
ee39e10c
PM
1381 },
1382 .opt = {
1383 /* 4s interval, 16s averaging constant */
1384 .interval = 2,
1385 .ewma_log = 2,
1386 },
1387 };
3696f625 1388
1da177e4 1389 /* check for valid classid */
f64f9e71
JP
1390 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1391 htb_find(classid, sch))
1da177e4
LT
1392 goto failure;
1393
1394 /* check maximal depth */
1395 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1396 pr_err("htb: tree is too deep\n");
1da177e4
LT
1397 goto failure;
1398 }
1399 err = -ENOBUFS;
cc7ec456
ED
1400 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1401 if (!cl)
1da177e4 1402 goto failure;
87990467 1403
64153ce0
ED
1404 if (htb_rate_est || tca[TCA_RATE]) {
1405 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1406 qdisc_root_sleeping_lock(sch),
1407 tca[TCA_RATE] ? : &est.nla);
1408 if (err) {
1409 kfree(cl);
1410 goto failure;
1411 }
71bcb09a
SH
1412 }
1413
1da177e4 1414 cl->refcnt = 1;
42077599 1415 cl->children = 0;
1da177e4 1416 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
3696f625
SH
1417 RB_CLEAR_NODE(&cl->pq_node);
1418
1419 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1420 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1421
1422 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1423 * so that can't be used inside of sch_tree_lock
1424 * -- thanks to Karlis Peisenieks
1425 */
3511c913 1426 new_q = qdisc_create_dflt(sch->dev_queue,
bb949fbd 1427 &pfifo_qdisc_ops, classid);
1da177e4
LT
1428 sch_tree_lock(sch);
1429 if (parent && !parent->level) {
256d61b8
PM
1430 unsigned int qlen = parent->un.leaf.q->q.qlen;
1431
1da177e4 1432 /* turn parent into inner node */
256d61b8
PM
1433 qdisc_reset(parent->un.leaf.q);
1434 qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
87990467
SH
1435 qdisc_destroy(parent->un.leaf.q);
1436 if (parent->prio_activity)
1437 htb_deactivate(q, parent);
1da177e4
LT
1438
1439 /* remove from evt list because of level change */
1440 if (parent->cmode != HTB_CAN_SEND) {
c9364636 1441 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1da177e4
LT
1442 parent->cmode = HTB_CAN_SEND;
1443 }
1444 parent->level = (parent->parent ? parent->parent->level
87990467
SH
1445 : TC_HTB_MAXDEPTH) - 1;
1446 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1da177e4
LT
1447 }
1448 /* leaf (we) needs elementary qdisc */
1449 cl->un.leaf.q = new_q ? new_q : &noop_qdisc;
1450
f4c1f3e0 1451 cl->common.classid = classid;
87990467 1452 cl->parent = parent;
1da177e4
LT
1453
1454 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1455 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1456 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8
ED
1457 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
1458 cl->t_c = ktime_to_ns(ktime_get());
1da177e4
LT
1459 cl->cmode = HTB_CAN_SEND;
1460
1461 /* attach to the hash list and parent's family */
f4c1f3e0 1462 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1463 if (parent)
1464 parent->children++;
ee39e10c 1465 } else {
71bcb09a
SH
1466 if (tca[TCA_RATE]) {
1467 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1468 qdisc_root_sleeping_lock(sch),
1469 tca[TCA_RATE]);
1470 if (err)
1471 return err;
1472 }
87990467 1473 sch_tree_lock(sch);
ee39e10c 1474 }
1da177e4 1475
1598f7cb
YY
1476 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1477
1478 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1479
1480 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1481 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1482
1da177e4 1483 /* it used to be a nasty bug here, we have to check that node
cc7ec456
ED
1484 * is really leaf before changing cl->un.leaf !
1485 */
1da177e4 1486 if (!cl->level) {
1598f7cb
YY
1487 u64 quantum = cl->rate.rate_bytes_ps;
1488
1489 do_div(quantum, q->rate2quantum);
1490 cl->quantum = min_t(u64, quantum, INT_MAX);
1491
c19f7a34 1492 if (!hopt->quantum && cl->quantum < 1000) {
c17988a9
YY
1493 pr_warn("HTB: quantum of class %X is small. Consider r2q change.\n",
1494 cl->common.classid);
c19f7a34 1495 cl->quantum = 1000;
1da177e4 1496 }
c19f7a34 1497 if (!hopt->quantum && cl->quantum > 200000) {
c17988a9
YY
1498 pr_warn("HTB: quantum of class %X is big. Consider r2q change.\n",
1499 cl->common.classid);
c19f7a34 1500 cl->quantum = 200000;
1da177e4
LT
1501 }
1502 if (hopt->quantum)
c19f7a34
JP
1503 cl->quantum = hopt->quantum;
1504 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1505 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1506 }
1507
324f5aa5 1508 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
f3ad857e 1509 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
56b765b7 1510
1da177e4
LT
1511 sch_tree_unlock(sch);
1512
f4c1f3e0
PM
1513 qdisc_class_hash_grow(sch, &q->clhash);
1514
1da177e4
LT
1515 *arg = (unsigned long)cl;
1516 return 0;
1517
1518failure:
1da177e4
LT
1519 return err;
1520}
1521
1522static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg)
1523{
1524 struct htb_sched *q = qdisc_priv(sch);
1525 struct htb_class *cl = (struct htb_class *)arg;
1526 struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list;
3bf72957 1527
1da177e4
LT
1528 return fl;
1529}
1530
1531static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1532 u32 classid)
1da177e4 1533{
87990467 1534 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1535
1da177e4 1536 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1537 * The line above used to be there to prevent attaching filters to
1538 * leaves. But at least tc_index filter uses this just to get class
1539 * for other reasons so that we have to allow for it.
1540 * ----
1541 * 19.6.2002 As Werner explained it is ok - bind filter is just
1542 * another way to "lock" the class - unlike "get" this lock can
1543 * be broken by class during destroy IIUC.
1da177e4 1544 */
87990467
SH
1545 if (cl)
1546 cl->filter_cnt++;
1da177e4
LT
1547 return (unsigned long)cl;
1548}
1549
1550static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1551{
1da177e4 1552 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1553
87990467
SH
1554 if (cl)
1555 cl->filter_cnt--;
1da177e4
LT
1556}
1557
1558static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1559{
1560 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 1561 struct htb_class *cl;
f4c1f3e0 1562 unsigned int i;
1da177e4
LT
1563
1564 if (arg->stop)
1565 return;
1566
f4c1f3e0 1567 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1568 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1569 if (arg->count < arg->skip) {
1570 arg->count++;
1571 continue;
1572 }
1573 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1574 arg->stop = 1;
1575 return;
1576 }
1577 arg->count++;
1578 }
1579 }
1580}
1581
20fea08b 1582static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1583 .graft = htb_graft,
1584 .leaf = htb_leaf,
256d61b8 1585 .qlen_notify = htb_qlen_notify,
1da177e4
LT
1586 .get = htb_get,
1587 .put = htb_put,
1588 .change = htb_change_class,
1589 .delete = htb_delete,
1590 .walk = htb_walk,
1591 .tcf_chain = htb_find_tcf,
1592 .bind_tcf = htb_bind_filter,
1593 .unbind_tcf = htb_unbind_filter,
1594 .dump = htb_dump_class,
1595 .dump_stats = htb_dump_class_stats,
1596};
1597
20fea08b 1598static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1599 .cl_ops = &htb_class_ops,
1600 .id = "htb",
1601 .priv_size = sizeof(struct htb_sched),
1602 .enqueue = htb_enqueue,
1603 .dequeue = htb_dequeue,
77be155c 1604 .peek = qdisc_peek_dequeued,
1da177e4
LT
1605 .drop = htb_drop,
1606 .init = htb_init,
1607 .reset = htb_reset,
1608 .destroy = htb_destroy,
1da177e4
LT
1609 .dump = htb_dump,
1610 .owner = THIS_MODULE,
1611};
1612
1613static int __init htb_module_init(void)
1614{
87990467 1615 return register_qdisc(&htb_qdisc_ops);
1da177e4 1616}
87990467 1617static void __exit htb_module_exit(void)
1da177e4 1618{
87990467 1619 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1620}
87990467 1621
1da177e4
LT
1622module_init(htb_module_init)
1623module_exit(htb_module_exit)
1624MODULE_LICENSE("GPL");