Merge branch 'topic/hda-acomp-base' into for-next
[linux-2.6-block.git] / net / sched / sch_htb.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
87990467 2/*
1da177e4
LT
3 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version
4 *
1da177e4
LT
5 * Authors: Martin Devera, <devik@cdi.cz>
6 *
7 * Credits (in time order) for older HTB versions:
8 * Stef Coene <stef.coene@docum.org>
9 * HTB support at LARTC mailing list
10297b99 10 * Ondrej Kraus, <krauso@barr.cz>
1da177e4
LT
11 * found missing INIT_QDISC(htb)
12 * Vladimir Smelhaus, Aamer Akhter, Bert Hubert
13 * helped a lot to locate nasty class stall bug
14 * Andi Kleen, Jamal Hadi, Bert Hubert
15 * code review and helpful comments on shaping
16 * Tomasz Wrona, <tw@eter.tym.pl>
17 * created test case so that I was able to fix nasty bug
18 * Wilfried Weissmann
19 * spotted bug in dequeue code and helped with fix
20 * Jiri Fojtasek
21 * fixed requeue routine
22 * and many others. thanks.
1da177e4 23 */
1da177e4 24#include <linux/module.h>
47083fc0 25#include <linux/moduleparam.h>
1da177e4
LT
26#include <linux/types.h>
27#include <linux/kernel.h>
1da177e4 28#include <linux/string.h>
1da177e4 29#include <linux/errno.h>
1da177e4
LT
30#include <linux/skbuff.h>
31#include <linux/list.h>
32#include <linux/compiler.h>
0ba48053 33#include <linux/rbtree.h>
1224736d 34#include <linux/workqueue.h>
5a0e3ad6 35#include <linux/slab.h>
dc5fc579 36#include <net/netlink.h>
292f1c7f 37#include <net/sch_generic.h>
1da177e4 38#include <net/pkt_sched.h>
cf1facda 39#include <net/pkt_cls.h>
1da177e4
LT
40
41/* HTB algorithm.
42 Author: devik@cdi.cz
43 ========================================================================
44 HTB is like TBF with multiple classes. It is also similar to CBQ because
10297b99 45 it allows to assign priority to each class in hierarchy.
1da177e4
LT
46 In fact it is another implementation of Floyd's formal sharing.
47
48 Levels:
10297b99 49 Each class is assigned level. Leaf has ALWAYS level 0 and root
1da177e4
LT
50 classes have level TC_HTB_MAXDEPTH-1. Interior nodes has level
51 one less than their parent.
52*/
53
47083fc0 54static int htb_hysteresis __read_mostly = 0; /* whether to use mode hysteresis for speedup */
87990467 55#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */
1da177e4
LT
56
57#if HTB_VER >> 16 != TC_HTB_PROTOVER
58#error "Mismatched sch_htb.c and pkt_sch.h"
59#endif
60
47083fc0
JDB
61/* Module parameter and sysfs export */
62module_param (htb_hysteresis, int, 0640);
63MODULE_PARM_DESC(htb_hysteresis, "Hysteresis mode, less CPU load, less accurate");
64
64153ce0
ED
65static int htb_rate_est = 0; /* htb classes have a default rate estimator */
66module_param(htb_rate_est, int, 0640);
67MODULE_PARM_DESC(htb_rate_est, "setup a default rate estimator (4sec 16sec) for htb classes");
68
1da177e4
LT
69/* used internaly to keep status of single class */
70enum htb_cmode {
87990467
SH
71 HTB_CANT_SEND, /* class can't send and can't borrow */
72 HTB_MAY_BORROW, /* class can't send but may borrow */
73 HTB_CAN_SEND /* class can send */
1da177e4
LT
74};
75
c9364636
ED
76struct htb_prio {
77 union {
78 struct rb_root row;
79 struct rb_root feed;
80 };
81 struct rb_node *ptr;
82 /* When class changes from state 1->2 and disconnects from
83 * parent's feed then we lost ptr value and start from the
84 * first child again. Here we store classid of the
85 * last valid ptr (used when ptr is NULL).
86 */
87 u32 last_ptr_id;
88};
89
ca4ec90b
ED
90/* interior & leaf nodes; props specific to leaves are marked L:
91 * To reduce false sharing, place mostly read fields at beginning,
92 * and mostly written ones at the end.
93 */
87990467 94struct htb_class {
f4c1f3e0 95 struct Qdisc_class_common common;
ca4ec90b
ED
96 struct psched_ratecfg rate;
97 struct psched_ratecfg ceil;
98 s64 buffer, cbuffer;/* token bucket depth/rate */
99 s64 mbuffer; /* max wait time */
cbd37556 100 u32 prio; /* these two are used only by leaves... */
ca4ec90b
ED
101 int quantum; /* but stored for parent-to-leaf return */
102
25d8c0d5 103 struct tcf_proto __rcu *filter_list; /* class attached filters */
6529eaba 104 struct tcf_block *block;
ca4ec90b 105 int filter_cnt;
ca4ec90b
ED
106
107 int level; /* our level (see above) */
108 unsigned int children;
109 struct htb_class *parent; /* parent class */
110
1c0d32fd 111 struct net_rate_estimator __rcu *rate_est;
1da177e4 112
ca4ec90b
ED
113 /*
114 * Written often fields
115 */
116 struct gnet_stats_basic_packed bstats;
ca4ec90b 117 struct tc_htb_xstats xstats; /* our special stats */
87990467 118
ca4ec90b
ED
119 /* token bucket parameters */
120 s64 tokens, ctokens;/* current number of tokens */
121 s64 t_c; /* checkpoint time */
c19f7a34 122
87990467
SH
123 union {
124 struct htb_class_leaf {
c9364636
ED
125 int deficit[TC_HTB_MAXDEPTH];
126 struct Qdisc *q;
87990467
SH
127 } leaf;
128 struct htb_class_inner {
c9364636 129 struct htb_prio clprio[TC_HTB_NUMPRIO];
87990467 130 } inner;
11957be2 131 };
ca4ec90b 132 s64 pq_key;
87990467 133
ca4ec90b
ED
134 int prio_activity; /* for which prios are we active */
135 enum htb_cmode cmode; /* current mode of the class */
136 struct rb_node pq_node; /* node for event queue */
137 struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */
338ed9b4
ED
138
139 unsigned int drops ____cacheline_aligned_in_smp;
3c75f6ee 140 unsigned int overlimits;
1da177e4
LT
141};
142
c9364636
ED
143struct htb_level {
144 struct rb_root wait_pq;
145 struct htb_prio hprio[TC_HTB_NUMPRIO];
146};
147
87990467 148struct htb_sched {
f4c1f3e0 149 struct Qdisc_class_hash clhash;
c9364636
ED
150 int defcls; /* class where unclassified flows go to */
151 int rate2quantum; /* quant = rate / rate2quantum */
1da177e4 152
c9364636 153 /* filters for qdisc itself */
25d8c0d5 154 struct tcf_proto __rcu *filter_list;
6529eaba 155 struct tcf_block *block;
1da177e4 156
c9364636
ED
157#define HTB_WARN_TOOMANYEVENTS 0x1
158 unsigned int warned; /* only one warning */
159 int direct_qlen;
160 struct work_struct work;
1da177e4 161
c9364636 162 /* non shaped skbs; let them go directly thru */
48da34b7 163 struct qdisc_skb_head direct_queue;
b362487a
CW
164 u32 direct_pkts;
165 u32 overlimits;
1da177e4 166
c9364636 167 struct qdisc_watchdog watchdog;
1da177e4 168
c9364636 169 s64 now; /* cached dequeue time */
1da177e4 170
c9364636
ED
171 /* time of nearest event per level (row) */
172 s64 near_ev_cache[TC_HTB_MAXDEPTH];
87990467 173
c9364636 174 int row_mask[TC_HTB_MAXDEPTH];
e82181de 175
c9364636 176 struct htb_level hlevel[TC_HTB_MAXDEPTH];
1da177e4
LT
177};
178
1da177e4 179/* find class in global hash table using given handle */
87990467 180static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
1da177e4
LT
181{
182 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 183 struct Qdisc_class_common *clc;
0cef296d 184
f4c1f3e0
PM
185 clc = qdisc_class_find(&q->clhash, handle);
186 if (clc == NULL)
1da177e4 187 return NULL;
f4c1f3e0 188 return container_of(clc, struct htb_class, common);
1da177e4
LT
189}
190
143976ce
WC
191static unsigned long htb_search(struct Qdisc *sch, u32 handle)
192{
193 return (unsigned long)htb_find(handle, sch);
194}
1da177e4
LT
195/**
196 * htb_classify - classify a packet into class
197 *
198 * It returns NULL if the packet should be dropped or -1 if the packet
199 * should be passed directly thru. In all other cases leaf class is returned.
200 * We allow direct class selection by classid in priority. The we examine
201 * filters in qdisc and in inner nodes (if higher filter points to the inner
202 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
10297b99 203 * internal fifo (direct). These packets then go directly thru. If we still
25985edc 204 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
1da177e4
LT
205 * then finish and return direct queue.
206 */
cc7ec456 207#define HTB_DIRECT ((struct htb_class *)-1L)
1da177e4 208
87990467
SH
209static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
210 int *qerr)
1da177e4
LT
211{
212 struct htb_sched *q = qdisc_priv(sch);
213 struct htb_class *cl;
214 struct tcf_result res;
215 struct tcf_proto *tcf;
216 int result;
217
218 /* allow to select class by setting skb->priority to valid classid;
cc7ec456
ED
219 * note that nfmark can be used too by attaching filter fw with no
220 * rules in it
221 */
1da177e4 222 if (skb->priority == sch->handle)
87990467 223 return HTB_DIRECT; /* X:0 (direct flow) selected */
cc7ec456 224 cl = htb_find(skb->priority, sch);
29824310
HM
225 if (cl) {
226 if (cl->level == 0)
227 return cl;
228 /* Start with inner filter chain if a non-leaf class is selected */
25d8c0d5 229 tcf = rcu_dereference_bh(cl->filter_list);
29824310 230 } else {
25d8c0d5 231 tcf = rcu_dereference_bh(q->filter_list);
29824310 232 }
1da177e4 233
c27f339a 234 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
87d83093 235 while (tcf && (result = tcf_classify(skb, tcf, &res, false)) >= 0) {
1da177e4
LT
236#ifdef CONFIG_NET_CLS_ACT
237 switch (result) {
238 case TC_ACT_QUEUED:
87990467 239 case TC_ACT_STOLEN:
e25ea21f 240 case TC_ACT_TRAP:
378a2f09 241 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
f3ae608e 242 /* fall through */
1da177e4
LT
243 case TC_ACT_SHOT:
244 return NULL;
245 }
1da177e4 246#endif
cc7ec456
ED
247 cl = (void *)res.class;
248 if (!cl) {
1da177e4 249 if (res.classid == sch->handle)
87990467 250 return HTB_DIRECT; /* X:0 (direct flow) */
cc7ec456
ED
251 cl = htb_find(res.classid, sch);
252 if (!cl)
87990467 253 break; /* filter selected invalid classid */
1da177e4
LT
254 }
255 if (!cl->level)
87990467 256 return cl; /* we hit leaf; return it */
1da177e4
LT
257
258 /* we have got inner class; apply inner filter chain */
25d8c0d5 259 tcf = rcu_dereference_bh(cl->filter_list);
1da177e4
LT
260 }
261 /* classification failed; try to use default class */
87990467 262 cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch);
1da177e4 263 if (!cl || cl->level)
87990467 264 return HTB_DIRECT; /* bad default .. this is safe bet */
1da177e4
LT
265 return cl;
266}
267
1da177e4
LT
268/**
269 * htb_add_to_id_tree - adds class to the round robin list
270 *
271 * Routine adds class to the list (actually tree) sorted by classid.
272 * Make sure that class is not already on such list for given prio.
273 */
87990467
SH
274static void htb_add_to_id_tree(struct rb_root *root,
275 struct htb_class *cl, int prio)
1da177e4
LT
276{
277 struct rb_node **p = &root->rb_node, *parent = NULL;
3bf72957 278
1da177e4 279 while (*p) {
87990467
SH
280 struct htb_class *c;
281 parent = *p;
1da177e4 282 c = rb_entry(parent, struct htb_class, node[prio]);
3bf72957 283
f4c1f3e0 284 if (cl->common.classid > c->common.classid)
1da177e4 285 p = &parent->rb_right;
87990467 286 else
1da177e4
LT
287 p = &parent->rb_left;
288 }
289 rb_link_node(&cl->node[prio], parent, p);
290 rb_insert_color(&cl->node[prio], root);
291}
292
293/**
294 * htb_add_to_wait_tree - adds class to the event queue with delay
295 *
296 * The class is added to priority event queue to indicate that class will
297 * change its mode in cl->pq_key microseconds. Make sure that class is not
298 * already in the queue.
299 */
87990467 300static void htb_add_to_wait_tree(struct htb_sched *q,
56b765b7 301 struct htb_class *cl, s64 delay)
1da177e4 302{
c9364636 303 struct rb_node **p = &q->hlevel[cl->level].wait_pq.rb_node, *parent = NULL;
3bf72957 304
fb983d45
PM
305 cl->pq_key = q->now + delay;
306 if (cl->pq_key == q->now)
1da177e4
LT
307 cl->pq_key++;
308
309 /* update the nearest event cache */
fb983d45 310 if (q->near_ev_cache[cl->level] > cl->pq_key)
1da177e4 311 q->near_ev_cache[cl->level] = cl->pq_key;
87990467 312
1da177e4 313 while (*p) {
87990467
SH
314 struct htb_class *c;
315 parent = *p;
1da177e4 316 c = rb_entry(parent, struct htb_class, pq_node);
fb983d45 317 if (cl->pq_key >= c->pq_key)
1da177e4 318 p = &parent->rb_right;
87990467 319 else
1da177e4
LT
320 p = &parent->rb_left;
321 }
322 rb_link_node(&cl->pq_node, parent, p);
c9364636 323 rb_insert_color(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4
LT
324}
325
326/**
327 * htb_next_rb_node - finds next node in binary tree
328 *
329 * When we are past last key we return NULL.
330 * Average complexity is 2 steps per call.
331 */
3696f625 332static inline void htb_next_rb_node(struct rb_node **n)
1da177e4
LT
333{
334 *n = rb_next(*n);
335}
336
337/**
338 * htb_add_class_to_row - add class to its row
339 *
340 * The class is added to row at priorities marked in mask.
341 * It does nothing if mask == 0.
342 */
87990467
SH
343static inline void htb_add_class_to_row(struct htb_sched *q,
344 struct htb_class *cl, int mask)
1da177e4 345{
1da177e4
LT
346 q->row_mask[cl->level] |= mask;
347 while (mask) {
348 int prio = ffz(~mask);
349 mask &= ~(1 << prio);
c9364636 350 htb_add_to_id_tree(&q->hlevel[cl->level].hprio[prio].row, cl, prio);
1da177e4
LT
351 }
352}
353
3696f625
SH
354/* If this triggers, it is a bug in this code, but it need not be fatal */
355static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
356{
81771b3b 357 if (RB_EMPTY_NODE(rb)) {
3696f625
SH
358 WARN_ON(1);
359 } else {
360 rb_erase(rb, root);
361 RB_CLEAR_NODE(rb);
362 }
363}
364
365
1da177e4
LT
366/**
367 * htb_remove_class_from_row - removes class from its row
368 *
369 * The class is removed from row at priorities marked in mask.
370 * It does nothing if mask == 0.
371 */
87990467
SH
372static inline void htb_remove_class_from_row(struct htb_sched *q,
373 struct htb_class *cl, int mask)
1da177e4
LT
374{
375 int m = 0;
c9364636 376 struct htb_level *hlevel = &q->hlevel[cl->level];
3bf72957 377
1da177e4
LT
378 while (mask) {
379 int prio = ffz(~mask);
c9364636 380 struct htb_prio *hprio = &hlevel->hprio[prio];
3696f625 381
1da177e4 382 mask &= ~(1 << prio);
c9364636
ED
383 if (hprio->ptr == cl->node + prio)
384 htb_next_rb_node(&hprio->ptr);
3696f625 385
c9364636
ED
386 htb_safe_rb_erase(cl->node + prio, &hprio->row);
387 if (!hprio->row.rb_node)
1da177e4
LT
388 m |= 1 << prio;
389 }
1da177e4
LT
390 q->row_mask[cl->level] &= ~m;
391}
392
393/**
394 * htb_activate_prios - creates active classe's feed chain
395 *
396 * The class is connected to ancestors and/or appropriate rows
10297b99 397 * for priorities it is participating on. cl->cmode must be new
1da177e4
LT
398 * (activated) mode. It does nothing if cl->prio_activity == 0.
399 */
87990467 400static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
1da177e4
LT
401{
402 struct htb_class *p = cl->parent;
87990467 403 long m, mask = cl->prio_activity;
1da177e4
LT
404
405 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
406 m = mask;
407 while (m) {
1da177e4
LT
408 int prio = ffz(~m);
409 m &= ~(1 << prio);
87990467 410
11957be2 411 if (p->inner.clprio[prio].feed.rb_node)
1da177e4 412 /* parent already has its feed in use so that
cc7ec456
ED
413 * reset bit in mask as parent is already ok
414 */
1da177e4 415 mask &= ~(1 << prio);
87990467 416
11957be2 417 htb_add_to_id_tree(&p->inner.clprio[prio].feed, cl, prio);
1da177e4 418 }
1da177e4 419 p->prio_activity |= mask;
87990467
SH
420 cl = p;
421 p = cl->parent;
3bf72957 422
1da177e4
LT
423 }
424 if (cl->cmode == HTB_CAN_SEND && mask)
87990467 425 htb_add_class_to_row(q, cl, mask);
1da177e4
LT
426}
427
428/**
429 * htb_deactivate_prios - remove class from feed chain
430 *
10297b99 431 * cl->cmode must represent old mode (before deactivation). It does
1da177e4
LT
432 * nothing if cl->prio_activity == 0. Class is removed from all feed
433 * chains and rows.
434 */
435static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
436{
437 struct htb_class *p = cl->parent;
87990467 438 long m, mask = cl->prio_activity;
1da177e4
LT
439
440 while (cl->cmode == HTB_MAY_BORROW && p && mask) {
87990467
SH
441 m = mask;
442 mask = 0;
1da177e4
LT
443 while (m) {
444 int prio = ffz(~m);
445 m &= ~(1 << prio);
87990467 446
11957be2 447 if (p->inner.clprio[prio].ptr == cl->node + prio) {
1da177e4 448 /* we are removing child which is pointed to from
cc7ec456
ED
449 * parent feed - forget the pointer but remember
450 * classid
451 */
11957be2
CW
452 p->inner.clprio[prio].last_ptr_id = cl->common.classid;
453 p->inner.clprio[prio].ptr = NULL;
1da177e4 454 }
87990467 455
c9364636 456 htb_safe_rb_erase(cl->node + prio,
11957be2 457 &p->inner.clprio[prio].feed);
87990467 458
11957be2 459 if (!p->inner.clprio[prio].feed.rb_node)
1da177e4
LT
460 mask |= 1 << prio;
461 }
3bf72957 462
1da177e4 463 p->prio_activity &= ~mask;
87990467
SH
464 cl = p;
465 p = cl->parent;
3bf72957 466
1da177e4 467 }
87990467
SH
468 if (cl->cmode == HTB_CAN_SEND && mask)
469 htb_remove_class_from_row(q, cl, mask);
1da177e4
LT
470}
471
56b765b7 472static inline s64 htb_lowater(const struct htb_class *cl)
18a63e86 473{
47083fc0
JDB
474 if (htb_hysteresis)
475 return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0;
476 else
477 return 0;
18a63e86 478}
56b765b7 479static inline s64 htb_hiwater(const struct htb_class *cl)
18a63e86 480{
47083fc0
JDB
481 if (htb_hysteresis)
482 return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0;
483 else
484 return 0;
18a63e86 485}
47083fc0 486
18a63e86 487
1da177e4
LT
488/**
489 * htb_class_mode - computes and returns current class mode
490 *
491 * It computes cl's mode at time cl->t_c+diff and returns it. If mode
492 * is not HTB_CAN_SEND then cl->pq_key is updated to time difference
10297b99 493 * from now to time when cl will change its state.
1da177e4 494 * Also it is worth to note that class mode doesn't change simply
10297b99 495 * at cl->{c,}tokens == 0 but there can rather be hysteresis of
1da177e4
LT
496 * 0 .. -cl->{c,}buffer range. It is meant to limit number of
497 * mode transitions per time unit. The speed gain is about 1/6.
498 */
87990467 499static inline enum htb_cmode
56b765b7 500htb_class_mode(struct htb_class *cl, s64 *diff)
1da177e4 501{
56b765b7 502 s64 toks;
1da177e4 503
87990467
SH
504 if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) {
505 *diff = -toks;
506 return HTB_CANT_SEND;
507 }
18a63e86 508
87990467
SH
509 if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl))
510 return HTB_CAN_SEND;
1da177e4 511
87990467
SH
512 *diff = -toks;
513 return HTB_MAY_BORROW;
1da177e4
LT
514}
515
516/**
517 * htb_change_class_mode - changes classe's mode
518 *
519 * This should be the only way how to change classe's mode under normal
520 * cirsumstances. Routine will update feed lists linkage, change mode
521 * and add class to the wait event queue if appropriate. New mode should
522 * be different from old one and cl->pq_key has to be valid if changing
523 * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree).
524 */
87990467 525static void
56b765b7 526htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff)
87990467
SH
527{
528 enum htb_cmode new_mode = htb_class_mode(cl, diff);
1da177e4
LT
529
530 if (new_mode == cl->cmode)
87990467
SH
531 return;
532
b362487a 533 if (new_mode == HTB_CANT_SEND) {
3c75f6ee 534 cl->overlimits++;
b362487a
CW
535 q->overlimits++;
536 }
3c75f6ee 537
87990467
SH
538 if (cl->prio_activity) { /* not necessary: speed optimization */
539 if (cl->cmode != HTB_CANT_SEND)
540 htb_deactivate_prios(q, cl);
1da177e4 541 cl->cmode = new_mode;
87990467
SH
542 if (new_mode != HTB_CANT_SEND)
543 htb_activate_prios(q, cl);
544 } else
1da177e4
LT
545 cl->cmode = new_mode;
546}
547
548/**
10297b99 549 * htb_activate - inserts leaf cl into appropriate active feeds
1da177e4
LT
550 *
551 * Routine learns (new) priority of leaf and activates feed chain
552 * for the prio. It can be called on already active leaf safely.
553 * It also adds leaf into droplist.
554 */
87990467 555static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
1da177e4 556{
11957be2 557 WARN_ON(cl->level || !cl->leaf.q || !cl->leaf.q->q.qlen);
3bf72957 558
1da177e4 559 if (!cl->prio_activity) {
c19f7a34 560 cl->prio_activity = 1 << cl->prio;
87990467 561 htb_activate_prios(q, cl);
1da177e4
LT
562 }
563}
564
565/**
10297b99 566 * htb_deactivate - remove leaf cl from active feeds
1da177e4
LT
567 *
568 * Make sure that leaf is active. In the other words it can't be called
569 * with non-active leaf. It also removes class from the drop list.
570 */
87990467 571static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
1da177e4 572{
547b792c 573 WARN_ON(!cl->prio_activity);
3bf72957 574
87990467 575 htb_deactivate_prios(q, cl);
1da177e4 576 cl->prio_activity = 0;
1da177e4
LT
577}
578
520ac30f
ED
579static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch,
580 struct sk_buff **to_free)
1da177e4 581{
f30ab418 582 int uninitialized_var(ret);
f6bab199 583 unsigned int len = qdisc_pkt_len(skb);
87990467
SH
584 struct htb_sched *q = qdisc_priv(sch);
585 struct htb_class *cl = htb_classify(skb, sch, &ret);
586
587 if (cl == HTB_DIRECT) {
588 /* enqueue to helper queue */
589 if (q->direct_queue.qlen < q->direct_qlen) {
aea890b8 590 __qdisc_enqueue_tail(skb, &q->direct_queue);
87990467
SH
591 q->direct_pkts++;
592 } else {
520ac30f 593 return qdisc_drop(skb, sch, to_free);
87990467 594 }
1da177e4 595#ifdef CONFIG_NET_CLS_ACT
87990467 596 } else if (!cl) {
c27f339a 597 if (ret & __NET_XMIT_BYPASS)
25331d6c 598 qdisc_qstats_drop(sch);
520ac30f 599 __qdisc_drop(skb, to_free);
87990467 600 return ret;
1da177e4 601#endif
11957be2 602 } else if ((ret = qdisc_enqueue(skb, cl->leaf.q,
520ac30f 603 to_free)) != NET_XMIT_SUCCESS) {
378a2f09 604 if (net_xmit_drop_count(ret)) {
25331d6c 605 qdisc_qstats_drop(sch);
338ed9b4 606 cl->drops++;
378a2f09 607 }
69747650 608 return ret;
87990467 609 } else {
87990467
SH
610 htb_activate(q, cl);
611 }
612
f6bab199 613 sch->qstats.backlog += len;
87990467 614 sch->q.qlen++;
87990467 615 return NET_XMIT_SUCCESS;
1da177e4
LT
616}
617
56b765b7 618static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 619{
56b765b7 620 s64 toks = diff + cl->tokens;
59e4220a
JP
621
622 if (toks > cl->buffer)
623 toks = cl->buffer;
292f1c7f 624 toks -= (s64) psched_l2t_ns(&cl->rate, bytes);
59e4220a
JP
625 if (toks <= -cl->mbuffer)
626 toks = 1 - cl->mbuffer;
627
628 cl->tokens = toks;
629}
630
56b765b7 631static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, s64 diff)
59e4220a 632{
56b765b7 633 s64 toks = diff + cl->ctokens;
59e4220a
JP
634
635 if (toks > cl->cbuffer)
636 toks = cl->cbuffer;
292f1c7f 637 toks -= (s64) psched_l2t_ns(&cl->ceil, bytes);
59e4220a
JP
638 if (toks <= -cl->mbuffer)
639 toks = 1 - cl->mbuffer;
640
641 cl->ctokens = toks;
642}
643
1da177e4
LT
644/**
645 * htb_charge_class - charges amount "bytes" to leaf and ancestors
646 *
647 * Routine assumes that packet "bytes" long was dequeued from leaf cl
648 * borrowing from "level". It accounts bytes to ceil leaky bucket for
649 * leaf and all ancestors and to rate bucket for ancestors at levels
650 * "level" and higher. It also handles possible change of mode resulting
651 * from the update. Note that mode can also increase here (MAY_BORROW to
652 * CAN_SEND) because we can use more precise clock that event queue here.
653 * In such case we remove class from event queue first.
654 */
87990467 655static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
c9726d68 656 int level, struct sk_buff *skb)
87990467 657{
0abf77e5 658 int bytes = qdisc_pkt_len(skb);
1da177e4 659 enum htb_cmode old_mode;
56b765b7 660 s64 diff;
1da177e4
LT
661
662 while (cl) {
56b765b7 663 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
1da177e4 664 if (cl->level >= level) {
87990467
SH
665 if (cl->level == level)
666 cl->xstats.lends++;
59e4220a 667 htb_accnt_tokens(cl, bytes, diff);
1da177e4
LT
668 } else {
669 cl->xstats.borrows++;
87990467 670 cl->tokens += diff; /* we moved t_c; update tokens */
1da177e4 671 }
59e4220a 672 htb_accnt_ctokens(cl, bytes, diff);
1da177e4 673 cl->t_c = q->now;
1da177e4 674
87990467
SH
675 old_mode = cl->cmode;
676 diff = 0;
677 htb_change_class_mode(q, cl, &diff);
1da177e4
LT
678 if (old_mode != cl->cmode) {
679 if (old_mode != HTB_CAN_SEND)
c9364636 680 htb_safe_rb_erase(&cl->pq_node, &q->hlevel[cl->level].wait_pq);
1da177e4 681 if (cl->cmode != HTB_CAN_SEND)
87990467 682 htb_add_to_wait_tree(q, cl, diff);
1da177e4 683 }
1da177e4 684
bfe0d029
ED
685 /* update basic stats except for leaves which are already updated */
686 if (cl->level)
687 bstats_update(&cl->bstats, skb);
688
1da177e4
LT
689 cl = cl->parent;
690 }
691}
692
693/**
694 * htb_do_events - make mode changes to classes at the level
695 *
fb983d45 696 * Scans event queue for pending events and applies them. Returns time of
1224736d 697 * next pending event (0 for no event in pq, q->now for too many events).
fb983d45 698 * Note: Applied are events whose have cl->pq_key <= q->now.
1da177e4 699 */
c9364636 700static s64 htb_do_events(struct htb_sched *q, const int level,
5343a7f8 701 unsigned long start)
1da177e4 702{
8f3ea33a 703 /* don't run for longer than 2 jiffies; 2 is used instead of
cc7ec456
ED
704 * 1 to simplify things when jiffy is going to be incremented
705 * too soon
706 */
a73be040 707 unsigned long stop_at = start + 2;
c9364636
ED
708 struct rb_root *wait_pq = &q->hlevel[level].wait_pq;
709
8f3ea33a 710 while (time_before(jiffies, stop_at)) {
1da177e4 711 struct htb_class *cl;
56b765b7 712 s64 diff;
c9364636 713 struct rb_node *p = rb_first(wait_pq);
30bdbe39 714
87990467
SH
715 if (!p)
716 return 0;
1da177e4
LT
717
718 cl = rb_entry(p, struct htb_class, pq_node);
fb983d45
PM
719 if (cl->pq_key > q->now)
720 return cl->pq_key;
721
c9364636 722 htb_safe_rb_erase(p, wait_pq);
56b765b7 723 diff = min_t(s64, q->now - cl->t_c, cl->mbuffer);
87990467 724 htb_change_class_mode(q, cl, &diff);
1da177e4 725 if (cl->cmode != HTB_CAN_SEND)
87990467 726 htb_add_to_wait_tree(q, cl, diff);
1da177e4 727 }
1224736d
JP
728
729 /* too much load - let's continue after a break for scheduling */
e82181de 730 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
c17988a9 731 pr_warn("htb: too many events!\n");
e82181de
JP
732 q->warned |= HTB_WARN_TOOMANYEVENTS;
733 }
1224736d
JP
734
735 return q->now;
1da177e4
LT
736}
737
738/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
cc7ec456
ED
739 * is no such one exists.
740 */
87990467
SH
741static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
742 u32 id)
1da177e4
LT
743{
744 struct rb_node *r = NULL;
745 while (n) {
87990467
SH
746 struct htb_class *cl =
747 rb_entry(n, struct htb_class, node[prio]);
87990467 748
f4c1f3e0 749 if (id > cl->common.classid) {
1da177e4 750 n = n->rb_right;
1b5c0077 751 } else if (id < cl->common.classid) {
1da177e4
LT
752 r = n;
753 n = n->rb_left;
1b5c0077
JP
754 } else {
755 return n;
1da177e4
LT
756 }
757 }
758 return r;
759}
760
761/**
762 * htb_lookup_leaf - returns next leaf class in DRR order
763 *
764 * Find leaf where current feed pointers points to.
765 */
c9364636 766static struct htb_class *htb_lookup_leaf(struct htb_prio *hprio, const int prio)
1da177e4
LT
767{
768 int i;
769 struct {
770 struct rb_node *root;
771 struct rb_node **pptr;
772 u32 *pid;
87990467
SH
773 } stk[TC_HTB_MAXDEPTH], *sp = stk;
774
c9364636
ED
775 BUG_ON(!hprio->row.rb_node);
776 sp->root = hprio->row.rb_node;
777 sp->pptr = &hprio->ptr;
778 sp->pid = &hprio->last_ptr_id;
1da177e4
LT
779
780 for (i = 0; i < 65535; i++) {
87990467 781 if (!*sp->pptr && *sp->pid) {
10297b99 782 /* ptr was invalidated but id is valid - try to recover
cc7ec456
ED
783 * the original or next ptr
784 */
87990467
SH
785 *sp->pptr =
786 htb_id_find_next_upper(prio, sp->root, *sp->pid);
1da177e4 787 }
87990467 788 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
cc7ec456
ED
789 * can become out of date quickly
790 */
87990467 791 if (!*sp->pptr) { /* we are at right end; rewind & go up */
1da177e4 792 *sp->pptr = sp->root;
87990467 793 while ((*sp->pptr)->rb_left)
1da177e4
LT
794 *sp->pptr = (*sp->pptr)->rb_left;
795 if (sp > stk) {
796 sp--;
512bb43e
JP
797 if (!*sp->pptr) {
798 WARN_ON(1);
87990467 799 return NULL;
512bb43e 800 }
87990467 801 htb_next_rb_node(sp->pptr);
1da177e4
LT
802 }
803 } else {
804 struct htb_class *cl;
c9364636
ED
805 struct htb_prio *clp;
806
87990467
SH
807 cl = rb_entry(*sp->pptr, struct htb_class, node[prio]);
808 if (!cl->level)
1da177e4 809 return cl;
11957be2 810 clp = &cl->inner.clprio[prio];
c9364636
ED
811 (++sp)->root = clp->feed.rb_node;
812 sp->pptr = &clp->ptr;
813 sp->pid = &clp->last_ptr_id;
1da177e4
LT
814 }
815 }
547b792c 816 WARN_ON(1);
1da177e4
LT
817 return NULL;
818}
819
820/* dequeues packet at given priority and level; call only if
cc7ec456
ED
821 * you are sure that there is active class at prio/level
822 */
c9364636
ED
823static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, const int prio,
824 const int level)
1da177e4
LT
825{
826 struct sk_buff *skb = NULL;
87990467 827 struct htb_class *cl, *start;
c9364636
ED
828 struct htb_level *hlevel = &q->hlevel[level];
829 struct htb_prio *hprio = &hlevel->hprio[prio];
830
1da177e4 831 /* look initial class up in the row */
c9364636 832 start = cl = htb_lookup_leaf(hprio, prio);
87990467 833
1da177e4
LT
834 do {
835next:
512bb43e 836 if (unlikely(!cl))
87990467 837 return NULL;
1da177e4
LT
838
839 /* class can be empty - it is unlikely but can be true if leaf
cc7ec456
ED
840 * qdisc drops packets in enqueue routine or if someone used
841 * graft operation on the leaf since last dequeue;
842 * simply deactivate and skip such class
843 */
11957be2 844 if (unlikely(cl->leaf.q->q.qlen == 0)) {
1da177e4 845 struct htb_class *next;
87990467 846 htb_deactivate(q, cl);
1da177e4
LT
847
848 /* row/level might become empty */
849 if ((q->row_mask[level] & (1 << prio)) == 0)
87990467 850 return NULL;
1da177e4 851
c9364636 852 next = htb_lookup_leaf(hprio, prio);
87990467
SH
853
854 if (cl == start) /* fix start if we just deleted it */
1da177e4
LT
855 start = next;
856 cl = next;
857 goto next;
858 }
87990467 859
11957be2 860 skb = cl->leaf.q->dequeue(cl->leaf.q);
87990467 861 if (likely(skb != NULL))
1da177e4 862 break;
633fe66e 863
11957be2
CW
864 qdisc_warn_nonwc("htb", cl->leaf.q);
865 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr:
c9364636
ED
866 &q->hlevel[0].hprio[prio].ptr);
867 cl = htb_lookup_leaf(hprio, prio);
1da177e4
LT
868
869 } while (cl != start);
870
871 if (likely(skb != NULL)) {
196d97f6 872 bstats_update(&cl->bstats, skb);
11957be2
CW
873 cl->leaf.deficit[level] -= qdisc_pkt_len(skb);
874 if (cl->leaf.deficit[level] < 0) {
875 cl->leaf.deficit[level] += cl->quantum;
876 htb_next_rb_node(level ? &cl->parent->inner.clprio[prio].ptr :
c9364636 877 &q->hlevel[0].hprio[prio].ptr);
1da177e4
LT
878 }
879 /* this used to be after charge_class but this constelation
cc7ec456
ED
880 * gives us slightly better performance
881 */
11957be2 882 if (!cl->leaf.q->q.qlen)
87990467 883 htb_deactivate(q, cl);
c9726d68 884 htb_charge_class(q, cl, level, skb);
1da177e4
LT
885 }
886 return skb;
887}
888
1da177e4
LT
889static struct sk_buff *htb_dequeue(struct Qdisc *sch)
890{
9190b3b3 891 struct sk_buff *skb;
1da177e4
LT
892 struct htb_sched *q = qdisc_priv(sch);
893 int level;
5343a7f8 894 s64 next_event;
a73be040 895 unsigned long start_at;
1da177e4
LT
896
897 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
48da34b7 898 skb = __qdisc_dequeue_head(&q->direct_queue);
87990467 899 if (skb != NULL) {
9190b3b3
ED
900ok:
901 qdisc_bstats_update(sch, skb);
431e3a8e 902 qdisc_qstats_backlog_dec(sch, skb);
1da177e4
LT
903 sch->q.qlen--;
904 return skb;
905 }
906
87990467
SH
907 if (!sch->q.qlen)
908 goto fin;
d2de875c 909 q->now = ktime_get_ns();
a73be040 910 start_at = jiffies;
1da177e4 911
d2fe85da 912 next_event = q->now + 5LLU * NSEC_PER_SEC;
633fe66e 913
1da177e4
LT
914 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
915 /* common case optimization - skip event handler quickly */
916 int m;
c9364636 917 s64 event = q->near_ev_cache[level];
fb983d45 918
c9364636 919 if (q->now >= event) {
a73be040 920 event = htb_do_events(q, level, start_at);
2e4b3b0e 921 if (!event)
56b765b7 922 event = q->now + NSEC_PER_SEC;
2e4b3b0e 923 q->near_ev_cache[level] = event;
c9364636 924 }
fb983d45 925
c0851347 926 if (next_event > event)
fb983d45 927 next_event = event;
87990467 928
1da177e4
LT
929 m = ~q->row_mask[level];
930 while (m != (int)(-1)) {
87990467 931 int prio = ffz(m);
cc7ec456 932
1da177e4 933 m |= 1 << prio;
87990467 934 skb = htb_dequeue_tree(q, prio, level);
9190b3b3
ED
935 if (likely(skb != NULL))
936 goto ok;
1da177e4
LT
937 }
938 }
a9efad8b 939 if (likely(next_event > q->now))
45f50bed 940 qdisc_watchdog_schedule_ns(&q->watchdog, next_event);
a9efad8b 941 else
1224736d 942 schedule_work(&q->work);
1da177e4 943fin:
1da177e4
LT
944 return skb;
945}
946
1da177e4
LT
947/* reset all classes */
948/* always caled under BH & queue lock */
87990467 949static void htb_reset(struct Qdisc *sch)
1da177e4
LT
950{
951 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 952 struct htb_class *cl;
f4c1f3e0 953 unsigned int i;
0cef296d 954
f4c1f3e0 955 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 956 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4 957 if (cl->level)
11957be2 958 memset(&cl->inner, 0, sizeof(cl->inner));
1da177e4 959 else {
11957be2
CW
960 if (cl->leaf.q)
961 qdisc_reset(cl->leaf.q);
1da177e4
LT
962 }
963 cl->prio_activity = 0;
964 cl->cmode = HTB_CAN_SEND;
1da177e4
LT
965 }
966 }
fb983d45 967 qdisc_watchdog_cancel(&q->watchdog);
a5a9f534 968 __qdisc_reset_queue(&q->direct_queue);
1da177e4 969 sch->q.qlen = 0;
431e3a8e 970 sch->qstats.backlog = 0;
c9364636 971 memset(q->hlevel, 0, sizeof(q->hlevel));
87990467 972 memset(q->row_mask, 0, sizeof(q->row_mask));
1da177e4
LT
973}
974
27a3421e
PM
975static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
976 [TCA_HTB_PARMS] = { .len = sizeof(struct tc_htb_opt) },
977 [TCA_HTB_INIT] = { .len = sizeof(struct tc_htb_glob) },
978 [TCA_HTB_CTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
979 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
6906f4ed 980 [TCA_HTB_DIRECT_QLEN] = { .type = NLA_U32 },
df62cdf3
ED
981 [TCA_HTB_RATE64] = { .type = NLA_U64 },
982 [TCA_HTB_CEIL64] = { .type = NLA_U64 },
27a3421e
PM
983};
984
1224736d
JP
985static void htb_work_func(struct work_struct *work)
986{
987 struct htb_sched *q = container_of(work, struct htb_sched, work);
988 struct Qdisc *sch = q->watchdog.qdisc;
989
0ee13627 990 rcu_read_lock();
1224736d 991 __netif_schedule(qdisc_root(sch));
0ee13627 992 rcu_read_unlock();
1224736d
JP
993}
994
e63d7dfd
AA
995static int htb_init(struct Qdisc *sch, struct nlattr *opt,
996 struct netlink_ext_ack *extack)
1da177e4
LT
997{
998 struct htb_sched *q = qdisc_priv(sch);
6906f4ed 999 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1000 struct tc_htb_glob *gopt;
cee63723 1001 int err;
cee63723 1002
88c2ace6
NA
1003 qdisc_watchdog_init(&q->watchdog, sch);
1004 INIT_WORK(&q->work, htb_work_func);
1005
cee63723
PM
1006 if (!opt)
1007 return -EINVAL;
1008
8d1a77f9 1009 err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
6529eaba
JP
1010 if (err)
1011 return err;
1012
8cb08174
JB
1013 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1014 NULL);
cee63723
PM
1015 if (err < 0)
1016 return err;
1017
6906f4ed 1018 if (!tb[TCA_HTB_INIT])
1da177e4 1019 return -EINVAL;
6906f4ed 1020
1e90474c 1021 gopt = nla_data(tb[TCA_HTB_INIT]);
6906f4ed 1022 if (gopt->version != HTB_VER >> 16)
1da177e4 1023 return -EINVAL;
1da177e4 1024
f4c1f3e0
PM
1025 err = qdisc_class_hash_init(&q->clhash);
1026 if (err < 0)
1027 return err;
1da177e4 1028
48da34b7 1029 qdisc_skb_head_init(&q->direct_queue);
1da177e4 1030
6906f4ed
ED
1031 if (tb[TCA_HTB_DIRECT_QLEN])
1032 q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
348e3435 1033 else
6906f4ed 1034 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
348e3435 1035
1da177e4
LT
1036 if ((q->rate2quantum = gopt->rate2quantum) < 1)
1037 q->rate2quantum = 1;
1038 q->defcls = gopt->defcls;
1039
1040 return 0;
1041}
1042
1043static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
1044{
1045 struct htb_sched *q = qdisc_priv(sch);
4b3550ef 1046 struct nlattr *nest;
1da177e4 1047 struct tc_htb_glob gopt;
4b3550ef 1048
b362487a 1049 sch->qstats.overlimits = q->overlimits;
6f542efc
ED
1050 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1051 * no change can happen on the qdisc parameters.
1052 */
1da177e4 1053
4b3550ef 1054 gopt.direct_pkts = q->direct_pkts;
1da177e4
LT
1055 gopt.version = HTB_VER;
1056 gopt.rate2quantum = q->rate2quantum;
1057 gopt.defcls = q->defcls;
3bf72957 1058 gopt.debug = 0;
4b3550ef 1059
ae0be8de 1060 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
4b3550ef
PM
1061 if (nest == NULL)
1062 goto nla_put_failure;
6906f4ed
ED
1063 if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
1064 nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
1b34ec43 1065 goto nla_put_failure;
4b3550ef 1066
6f542efc 1067 return nla_nest_end(skb, nest);
4b3550ef 1068
1e90474c 1069nla_put_failure:
4b3550ef 1070 nla_nest_cancel(skb, nest);
1da177e4
LT
1071 return -1;
1072}
1073
1074static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
87990467 1075 struct sk_buff *skb, struct tcmsg *tcm)
1da177e4 1076{
87990467 1077 struct htb_class *cl = (struct htb_class *)arg;
4b3550ef 1078 struct nlattr *nest;
1da177e4
LT
1079 struct tc_htb_opt opt;
1080
6f542efc
ED
1081 /* Its safe to not acquire qdisc lock. As we hold RTNL,
1082 * no change can happen on the class parameters.
1083 */
f4c1f3e0
PM
1084 tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
1085 tcm->tcm_handle = cl->common.classid;
11957be2
CW
1086 if (!cl->level && cl->leaf.q)
1087 tcm->tcm_info = cl->leaf.q->handle;
1da177e4 1088
ae0be8de 1089 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
4b3550ef
PM
1090 if (nest == NULL)
1091 goto nla_put_failure;
1da177e4 1092
87990467 1093 memset(&opt, 0, sizeof(opt));
1da177e4 1094
01cb71d2 1095 psched_ratecfg_getrate(&opt.rate, &cl->rate);
9c10f411 1096 opt.buffer = PSCHED_NS2TICKS(cl->buffer);
01cb71d2 1097 psched_ratecfg_getrate(&opt.ceil, &cl->ceil);
9c10f411 1098 opt.cbuffer = PSCHED_NS2TICKS(cl->cbuffer);
c19f7a34
JP
1099 opt.quantum = cl->quantum;
1100 opt.prio = cl->prio;
87990467 1101 opt.level = cl->level;
1b34ec43
DM
1102 if (nla_put(skb, TCA_HTB_PARMS, sizeof(opt), &opt))
1103 goto nla_put_failure;
df62cdf3 1104 if ((cl->rate.rate_bytes_ps >= (1ULL << 32)) &&
2a51c1e8
ND
1105 nla_put_u64_64bit(skb, TCA_HTB_RATE64, cl->rate.rate_bytes_ps,
1106 TCA_HTB_PAD))
df62cdf3
ED
1107 goto nla_put_failure;
1108 if ((cl->ceil.rate_bytes_ps >= (1ULL << 32)) &&
2a51c1e8
ND
1109 nla_put_u64_64bit(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps,
1110 TCA_HTB_PAD))
df62cdf3 1111 goto nla_put_failure;
4b3550ef 1112
6f542efc 1113 return nla_nest_end(skb, nest);
4b3550ef 1114
1e90474c 1115nla_put_failure:
4b3550ef 1116 nla_nest_cancel(skb, nest);
1da177e4
LT
1117 return -1;
1118}
1119
1120static int
87990467 1121htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
1da177e4 1122{
87990467 1123 struct htb_class *cl = (struct htb_class *)arg;
338ed9b4
ED
1124 struct gnet_stats_queue qs = {
1125 .drops = cl->drops,
3c75f6ee 1126 .overlimits = cl->overlimits,
338ed9b4 1127 };
64015853 1128 __u32 qlen = 0;
1da177e4 1129
5dd431b6
PA
1130 if (!cl->level && cl->leaf.q)
1131 qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
1132
0564bf0a
KK
1133 cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
1134 INT_MIN, INT_MAX);
1135 cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
1136 INT_MIN, INT_MAX);
1da177e4 1137
edb09eb1
ED
1138 if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
1139 d, NULL, &cl->bstats) < 0 ||
1c0d32fd 1140 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
338ed9b4 1141 gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
1da177e4
LT
1142 return -1;
1143
1144 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1145}
1146
1147static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 1148 struct Qdisc **old, struct netlink_ext_ack *extack)
1da177e4 1149{
87990467 1150 struct htb_class *cl = (struct htb_class *)arg;
1da177e4 1151
5b9a9ccf
PM
1152 if (cl->level)
1153 return -EINVAL;
1154 if (new == NULL &&
3511c913 1155 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
a38a9882 1156 cl->common.classid, extack)) == NULL)
5b9a9ccf
PM
1157 return -ENOBUFS;
1158
11957be2 1159 *old = qdisc_replace(sch, new, &cl->leaf.q);
5b9a9ccf 1160 return 0;
1da177e4
LT
1161}
1162
87990467 1163static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg)
1da177e4 1164{
87990467 1165 struct htb_class *cl = (struct htb_class *)arg;
11957be2 1166 return !cl->level ? cl->leaf.q : NULL;
1da177e4
LT
1167}
1168
256d61b8
PM
1169static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg)
1170{
1171 struct htb_class *cl = (struct htb_class *)arg;
1172
95946658 1173 htb_deactivate(qdisc_priv(sch), cl);
256d61b8
PM
1174}
1175
160d5e10
JP
1176static inline int htb_parent_last_child(struct htb_class *cl)
1177{
1178 if (!cl->parent)
1179 /* the root class */
1180 return 0;
42077599 1181 if (cl->parent->children > 1)
160d5e10
JP
1182 /* not the last child */
1183 return 0;
160d5e10
JP
1184 return 1;
1185}
1186
3ba08b00
JP
1187static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1188 struct Qdisc *new_q)
160d5e10
JP
1189{
1190 struct htb_class *parent = cl->parent;
1191
11957be2 1192 WARN_ON(cl->level || !cl->leaf.q || cl->prio_activity);
160d5e10 1193
3ba08b00 1194 if (parent->cmode != HTB_CAN_SEND)
c9364636
ED
1195 htb_safe_rb_erase(&parent->pq_node,
1196 &q->hlevel[parent->level].wait_pq);
3ba08b00 1197
160d5e10 1198 parent->level = 0;
11957be2
CW
1199 memset(&parent->inner, 0, sizeof(parent->inner));
1200 parent->leaf.q = new_q ? new_q : &noop_qdisc;
160d5e10
JP
1201 parent->tokens = parent->buffer;
1202 parent->ctokens = parent->cbuffer;
d2de875c 1203 parent->t_c = ktime_get_ns();
160d5e10
JP
1204 parent->cmode = HTB_CAN_SEND;
1205}
1206
87990467 1207static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1da177e4 1208{
1da177e4 1209 if (!cl->level) {
11957be2 1210 WARN_ON(!cl->leaf.q);
86bd446b 1211 qdisc_put(cl->leaf.q);
1da177e4 1212 }
1c0d32fd 1213 gen_kill_estimator(&cl->rate_est);
6529eaba 1214 tcf_block_put(cl->block);
1da177e4
LT
1215 kfree(cl);
1216}
1217
87990467 1218static void htb_destroy(struct Qdisc *sch)
1da177e4
LT
1219{
1220 struct htb_sched *q = qdisc_priv(sch);
b67bfe0d 1221 struct hlist_node *next;
fbd8f137
PM
1222 struct htb_class *cl;
1223 unsigned int i;
1da177e4 1224
1224736d 1225 cancel_work_sync(&q->work);
fb983d45 1226 qdisc_watchdog_cancel(&q->watchdog);
1da177e4 1227 /* This line used to be after htb_destroy_class call below
cc7ec456
ED
1228 * and surprisingly it worked in 2.4. But it must precede it
1229 * because filter need its target class alive to be able to call
1230 * unbind_filter on it (without Oops).
1231 */
6529eaba 1232 tcf_block_put(q->block);
87990467 1233
f4c1f3e0 1234 for (i = 0; i < q->clhash.hashsize; i++) {
89890422 1235 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
6529eaba 1236 tcf_block_put(cl->block);
89890422
KK
1237 cl->block = NULL;
1238 }
fbd8f137 1239 }
f4c1f3e0 1240 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1241 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
f4c1f3e0 1242 common.hnode)
fbd8f137
PM
1243 htb_destroy_class(sch, cl);
1244 }
f4c1f3e0 1245 qdisc_class_hash_destroy(&q->clhash);
a5a9f534 1246 __qdisc_reset_queue(&q->direct_queue);
1da177e4
LT
1247}
1248
1249static int htb_delete(struct Qdisc *sch, unsigned long arg)
1250{
1251 struct htb_sched *q = qdisc_priv(sch);
87990467 1252 struct htb_class *cl = (struct htb_class *)arg;
160d5e10
JP
1253 struct Qdisc *new_q = NULL;
1254 int last_child = 0;
1da177e4 1255
a071d272
YY
1256 /* TODO: why don't allow to delete subtree ? references ? does
1257 * tc subsys guarantee us that in htb_destroy it holds no class
1258 * refs so that we can remove children safely there ?
1259 */
42077599 1260 if (cl->children || cl->filter_cnt)
1da177e4 1261 return -EBUSY;
87990467 1262
160d5e10 1263 if (!cl->level && htb_parent_last_child(cl)) {
3511c913 1264 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
a38a9882
AA
1265 cl->parent->common.classid,
1266 NULL);
160d5e10
JP
1267 last_child = 1;
1268 }
1269
1da177e4 1270 sch_tree_lock(sch);
87990467 1271
e5f0e8f8
PA
1272 if (!cl->level)
1273 qdisc_purge_queue(cl->leaf.q);
814a175e 1274
f4c1f3e0
PM
1275 /* delete from hash and active; remainder in destroy_class */
1276 qdisc_class_hash_remove(&q->clhash, &cl->common);
26b284de
JP
1277 if (cl->parent)
1278 cl->parent->children--;
c38c83cb 1279
1da177e4 1280 if (cl->prio_activity)
87990467 1281 htb_deactivate(q, cl);
1da177e4 1282
fbd8f137 1283 if (cl->cmode != HTB_CAN_SEND)
c9364636
ED
1284 htb_safe_rb_erase(&cl->pq_node,
1285 &q->hlevel[cl->level].wait_pq);
fbd8f137 1286
160d5e10 1287 if (last_child)
3ba08b00 1288 htb_parent_to_leaf(q, cl, new_q);
160d5e10 1289
1da177e4 1290 sch_tree_unlock(sch);
1da177e4 1291
143976ce
WC
1292 htb_destroy_class(sch, cl);
1293 return 0;
1da177e4
LT
1294}
1295
87990467 1296static int htb_change_class(struct Qdisc *sch, u32 classid,
1e90474c 1297 u32 parentid, struct nlattr **tca,
793d81d6 1298 unsigned long *arg, struct netlink_ext_ack *extack)
1da177e4
LT
1299{
1300 int err = -EINVAL;
1301 struct htb_sched *q = qdisc_priv(sch);
87990467 1302 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1e90474c 1303 struct nlattr *opt = tca[TCA_OPTIONS];
6906f4ed 1304 struct nlattr *tb[TCA_HTB_MAX + 1];
1da177e4 1305 struct tc_htb_opt *hopt;
df62cdf3 1306 u64 rate64, ceil64;
da01ec4e 1307 int warn = 0;
1da177e4
LT
1308
1309 /* extract all subattrs from opt attr */
cee63723
PM
1310 if (!opt)
1311 goto failure;
1312
8cb08174
JB
1313 err = nla_parse_nested_deprecated(tb, TCA_HTB_MAX, opt, htb_policy,
1314 NULL);
cee63723
PM
1315 if (err < 0)
1316 goto failure;
1317
1318 err = -EINVAL;
27a3421e 1319 if (tb[TCA_HTB_PARMS] == NULL)
1da177e4 1320 goto failure;
1da177e4 1321
87990467
SH
1322 parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch);
1323
1e90474c 1324 hopt = nla_data(tb[TCA_HTB_PARMS]);
196d97f6 1325 if (!hopt->rate.rate || !hopt->ceil.rate)
87990467 1326 goto failure;
1da177e4 1327
8a8e3d84 1328 /* Keeping backward compatible with rate_table based iproute2 tc */
6b1dd856 1329 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE)
e9bc3fa2
AA
1330 qdisc_put_rtab(qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB],
1331 NULL));
6b1dd856
YY
1332
1333 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE)
e9bc3fa2
AA
1334 qdisc_put_rtab(qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB],
1335 NULL));
8a8e3d84 1336
87990467 1337 if (!cl) { /* new class */
1da177e4 1338 struct Qdisc *new_q;
3696f625 1339 int prio;
ee39e10c 1340 struct {
1e90474c 1341 struct nlattr nla;
ee39e10c
PM
1342 struct gnet_estimator opt;
1343 } est = {
1e90474c
PM
1344 .nla = {
1345 .nla_len = nla_attr_size(sizeof(est.opt)),
1346 .nla_type = TCA_RATE,
ee39e10c
PM
1347 },
1348 .opt = {
1349 /* 4s interval, 16s averaging constant */
1350 .interval = 2,
1351 .ewma_log = 2,
1352 },
1353 };
3696f625 1354
1da177e4 1355 /* check for valid classid */
f64f9e71
JP
1356 if (!classid || TC_H_MAJ(classid ^ sch->handle) ||
1357 htb_find(classid, sch))
1da177e4
LT
1358 goto failure;
1359
1360 /* check maximal depth */
1361 if (parent && parent->parent && parent->parent->level < 2) {
cc7ec456 1362 pr_err("htb: tree is too deep\n");
1da177e4
LT
1363 goto failure;
1364 }
1365 err = -ENOBUFS;
cc7ec456
ED
1366 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1367 if (!cl)
1da177e4 1368 goto failure;
87990467 1369
8d1a77f9 1370 err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
6529eaba
JP
1371 if (err) {
1372 kfree(cl);
1373 goto failure;
1374 }
64153ce0 1375 if (htb_rate_est || tca[TCA_RATE]) {
22e0f8b9
JF
1376 err = gen_new_estimator(&cl->bstats, NULL,
1377 &cl->rate_est,
edb09eb1
ED
1378 NULL,
1379 qdisc_root_sleeping_running(sch),
64153ce0
ED
1380 tca[TCA_RATE] ? : &est.nla);
1381 if (err) {
6529eaba 1382 tcf_block_put(cl->block);
64153ce0
ED
1383 kfree(cl);
1384 goto failure;
1385 }
71bcb09a
SH
1386 }
1387
42077599 1388 cl->children = 0;
3696f625
SH
1389 RB_CLEAR_NODE(&cl->pq_node);
1390
1391 for (prio = 0; prio < TC_HTB_NUMPRIO; prio++)
1392 RB_CLEAR_NODE(&cl->node[prio]);
1da177e4
LT
1393
1394 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
cc7ec456
ED
1395 * so that can't be used inside of sch_tree_lock
1396 * -- thanks to Karlis Peisenieks
1397 */
a38a9882
AA
1398 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1399 classid, NULL);
1da177e4
LT
1400 sch_tree_lock(sch);
1401 if (parent && !parent->level) {
1402 /* turn parent into inner node */
e5f0e8f8 1403 qdisc_purge_queue(parent->leaf.q);
86bd446b 1404 qdisc_put(parent->leaf.q);
87990467
SH
1405 if (parent->prio_activity)
1406 htb_deactivate(q, parent);
1da177e4
LT
1407
1408 /* remove from evt list because of level change */
1409 if (parent->cmode != HTB_CAN_SEND) {
c9364636 1410 htb_safe_rb_erase(&parent->pq_node, &q->hlevel[0].wait_pq);
1da177e4
LT
1411 parent->cmode = HTB_CAN_SEND;
1412 }
1413 parent->level = (parent->parent ? parent->parent->level
87990467 1414 : TC_HTB_MAXDEPTH) - 1;
11957be2 1415 memset(&parent->inner, 0, sizeof(parent->inner));
1da177e4
LT
1416 }
1417 /* leaf (we) needs elementary qdisc */
11957be2 1418 cl->leaf.q = new_q ? new_q : &noop_qdisc;
1da177e4 1419
f4c1f3e0 1420 cl->common.classid = classid;
87990467 1421 cl->parent = parent;
1da177e4
LT
1422
1423 /* set class to be in HTB_CAN_SEND state */
b9a7afde
JP
1424 cl->tokens = PSCHED_TICKS2NS(hopt->buffer);
1425 cl->ctokens = PSCHED_TICKS2NS(hopt->cbuffer);
5343a7f8 1426 cl->mbuffer = 60ULL * NSEC_PER_SEC; /* 1min */
d2de875c 1427 cl->t_c = ktime_get_ns();
1da177e4
LT
1428 cl->cmode = HTB_CAN_SEND;
1429
1430 /* attach to the hash list and parent's family */
f4c1f3e0 1431 qdisc_class_hash_insert(&q->clhash, &cl->common);
42077599
PM
1432 if (parent)
1433 parent->children++;
11957be2
CW
1434 if (cl->leaf.q != &noop_qdisc)
1435 qdisc_hash_add(cl->leaf.q, true);
ee39e10c 1436 } else {
71bcb09a 1437 if (tca[TCA_RATE]) {
22e0f8b9
JF
1438 err = gen_replace_estimator(&cl->bstats, NULL,
1439 &cl->rate_est,
edb09eb1
ED
1440 NULL,
1441 qdisc_root_sleeping_running(sch),
71bcb09a
SH
1442 tca[TCA_RATE]);
1443 if (err)
1444 return err;
1445 }
87990467 1446 sch_tree_lock(sch);
ee39e10c 1447 }
1da177e4 1448
1598f7cb
YY
1449 rate64 = tb[TCA_HTB_RATE64] ? nla_get_u64(tb[TCA_HTB_RATE64]) : 0;
1450
1451 ceil64 = tb[TCA_HTB_CEIL64] ? nla_get_u64(tb[TCA_HTB_CEIL64]) : 0;
1452
1453 psched_ratecfg_precompute(&cl->rate, &hopt->rate, rate64);
1454 psched_ratecfg_precompute(&cl->ceil, &hopt->ceil, ceil64);
1455
1da177e4 1456 /* it used to be a nasty bug here, we have to check that node
11957be2 1457 * is really leaf before changing cl->leaf !
cc7ec456 1458 */
1da177e4 1459 if (!cl->level) {
1598f7cb
YY
1460 u64 quantum = cl->rate.rate_bytes_ps;
1461
1462 do_div(quantum, q->rate2quantum);
1463 cl->quantum = min_t(u64, quantum, INT_MAX);
1464
c19f7a34 1465 if (!hopt->quantum && cl->quantum < 1000) {
da01ec4e 1466 warn = -1;
c19f7a34 1467 cl->quantum = 1000;
1da177e4 1468 }
c19f7a34 1469 if (!hopt->quantum && cl->quantum > 200000) {
da01ec4e 1470 warn = 1;
c19f7a34 1471 cl->quantum = 200000;
1da177e4
LT
1472 }
1473 if (hopt->quantum)
c19f7a34
JP
1474 cl->quantum = hopt->quantum;
1475 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1476 cl->prio = TC_HTB_NUMPRIO - 1;
1da177e4
LT
1477 }
1478
324f5aa5 1479 cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
f3ad857e 1480 cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
56b765b7 1481
1da177e4
LT
1482 sch_tree_unlock(sch);
1483
da01ec4e
LR
1484 if (warn)
1485 pr_warn("HTB: quantum of class %X is %s. Consider r2q change.\n",
1486 cl->common.classid, (warn == -1 ? "small" : "big"));
1487
f4c1f3e0
PM
1488 qdisc_class_hash_grow(sch, &q->clhash);
1489
1da177e4
LT
1490 *arg = (unsigned long)cl;
1491 return 0;
1492
1493failure:
1da177e4
LT
1494 return err;
1495}
1496
cbaacc4e
AA
1497static struct tcf_block *htb_tcf_block(struct Qdisc *sch, unsigned long arg,
1498 struct netlink_ext_ack *extack)
1da177e4
LT
1499{
1500 struct htb_sched *q = qdisc_priv(sch);
1501 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1502
6529eaba 1503 return cl ? cl->block : q->block;
1da177e4
LT
1504}
1505
1506static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
87990467 1507 u32 classid)
1da177e4 1508{
87990467 1509 struct htb_class *cl = htb_find(classid, sch);
3bf72957 1510
1da177e4 1511 /*if (cl && !cl->level) return 0;
cc7ec456
ED
1512 * The line above used to be there to prevent attaching filters to
1513 * leaves. But at least tc_index filter uses this just to get class
1514 * for other reasons so that we have to allow for it.
1515 * ----
1516 * 19.6.2002 As Werner explained it is ok - bind filter is just
1517 * another way to "lock" the class - unlike "get" this lock can
1518 * be broken by class during destroy IIUC.
1da177e4 1519 */
87990467
SH
1520 if (cl)
1521 cl->filter_cnt++;
1da177e4
LT
1522 return (unsigned long)cl;
1523}
1524
1525static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg)
1526{
1da177e4 1527 struct htb_class *cl = (struct htb_class *)arg;
3bf72957 1528
87990467
SH
1529 if (cl)
1530 cl->filter_cnt--;
1da177e4
LT
1531}
1532
1533static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1534{
1535 struct htb_sched *q = qdisc_priv(sch);
f4c1f3e0 1536 struct htb_class *cl;
f4c1f3e0 1537 unsigned int i;
1da177e4
LT
1538
1539 if (arg->stop)
1540 return;
1541
f4c1f3e0 1542 for (i = 0; i < q->clhash.hashsize; i++) {
b67bfe0d 1543 hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
1da177e4
LT
1544 if (arg->count < arg->skip) {
1545 arg->count++;
1546 continue;
1547 }
1548 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
1549 arg->stop = 1;
1550 return;
1551 }
1552 arg->count++;
1553 }
1554 }
1555}
1556
20fea08b 1557static const struct Qdisc_class_ops htb_class_ops = {
1da177e4
LT
1558 .graft = htb_graft,
1559 .leaf = htb_leaf,
256d61b8 1560 .qlen_notify = htb_qlen_notify,
143976ce 1561 .find = htb_search,
1da177e4
LT
1562 .change = htb_change_class,
1563 .delete = htb_delete,
1564 .walk = htb_walk,
6529eaba 1565 .tcf_block = htb_tcf_block,
1da177e4
LT
1566 .bind_tcf = htb_bind_filter,
1567 .unbind_tcf = htb_unbind_filter,
1568 .dump = htb_dump_class,
1569 .dump_stats = htb_dump_class_stats,
1570};
1571
20fea08b 1572static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1da177e4
LT
1573 .cl_ops = &htb_class_ops,
1574 .id = "htb",
1575 .priv_size = sizeof(struct htb_sched),
1576 .enqueue = htb_enqueue,
1577 .dequeue = htb_dequeue,
77be155c 1578 .peek = qdisc_peek_dequeued,
1da177e4
LT
1579 .init = htb_init,
1580 .reset = htb_reset,
1581 .destroy = htb_destroy,
1da177e4
LT
1582 .dump = htb_dump,
1583 .owner = THIS_MODULE,
1584};
1585
1586static int __init htb_module_init(void)
1587{
87990467 1588 return register_qdisc(&htb_qdisc_ops);
1da177e4 1589}
87990467 1590static void __exit htb_module_exit(void)
1da177e4 1591{
87990467 1592 unregister_qdisc(&htb_qdisc_ops);
1da177e4 1593}
87990467 1594
1da177e4
LT
1595module_init(htb_module_init)
1596module_exit(htb_module_exit)
1597MODULE_LICENSE("GPL");