net: sched: Allow extending set of supported RED flags
[linux-block.git] / net / sched / sch_red.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * net/sched/sch_red.c Random Early Detection queue.
4 *
1da177e4
LT
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Changes:
dba051f3 8 * J Hadi Salim 980914: computation fixes
1da177e4 9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
dba051f3 10 * J Hadi Salim 980816: ECN support
1da177e4
LT
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/skbuff.h>
1da177e4 17#include <net/pkt_sched.h>
602f3baf 18#include <net/pkt_cls.h>
1da177e4 19#include <net/inet_ecn.h>
6b31b28a 20#include <net/red.h>
1da177e4
LT
21
22
6b31b28a 23/* Parameters, settable by user:
1da177e4
LT
24 -----------------------------
25
26 limit - bytes (must be > qth_max + burst)
27
28 Hard limit on queue length, should be chosen >qth_max
29 to allow packet bursts. This parameter does not
30 affect the algorithms behaviour and can be chosen
31 arbitrarily high (well, less than ram size)
32 Really, this limit will never be reached
33 if RED works correctly.
1da177e4
LT
34 */
35
cc7ec456 36struct red_sched_data {
6b31b28a 37 u32 limit; /* HARD maximal queue length */
14bc175d 38
6b31b28a 39 unsigned char flags;
14bc175d
PM
40 /* Non-flags in tc_red_qopt.flags. */
41 unsigned char userbits;
42
8af2a218 43 struct timer_list adapt_timer;
cdeabbb8 44 struct Qdisc *sch;
6b31b28a 45 struct red_parms parms;
eeca6688 46 struct red_vars vars;
6b31b28a 47 struct red_stats stats;
f38c39d6 48 struct Qdisc *qdisc;
1da177e4
LT
49};
50
14bc175d
PM
51static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS;
52
6b31b28a 53static inline int red_use_ecn(struct red_sched_data *q)
1da177e4 54{
6b31b28a 55 return q->flags & TC_RED_ECN;
1da177e4
LT
56}
57
bdc450a0
TG
58static inline int red_use_harddrop(struct red_sched_data *q)
59{
60 return q->flags & TC_RED_HARDDROP;
61}
62
520ac30f
ED
63static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
64 struct sk_buff **to_free)
1da177e4
LT
65{
66 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6
PM
67 struct Qdisc *child = q->qdisc;
68 int ret;
1da177e4 69
eeca6688
ED
70 q->vars.qavg = red_calc_qavg(&q->parms,
71 &q->vars,
72 child->qstats.backlog);
1da177e4 73
eeca6688
ED
74 if (red_is_idling(&q->vars))
75 red_end_of_idle_period(&q->vars);
1da177e4 76
eeca6688 77 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
cc7ec456
ED
78 case RED_DONT_MARK:
79 break;
80
81 case RED_PROB_MARK:
25331d6c 82 qdisc_qstats_overlimit(sch);
cc7ec456
ED
83 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
84 q->stats.prob_drop++;
85 goto congestion_drop;
86 }
87
88 q->stats.prob_mark++;
89 break;
90
91 case RED_HARD_MARK:
25331d6c 92 qdisc_qstats_overlimit(sch);
cc7ec456
ED
93 if (red_use_harddrop(q) || !red_use_ecn(q) ||
94 !INET_ECN_set_ce(skb)) {
95 q->stats.forced_drop++;
96 goto congestion_drop;
97 }
98
99 q->stats.forced_mark++;
100 break;
1da177e4
LT
101 }
102
520ac30f 103 ret = qdisc_enqueue(skb, child, to_free);
f38c39d6 104 if (likely(ret == NET_XMIT_SUCCESS)) {
d7f4f332 105 qdisc_qstats_backlog_inc(sch, skb);
f38c39d6 106 sch->q.qlen++;
378a2f09 107 } else if (net_xmit_drop_count(ret)) {
f38c39d6 108 q->stats.pdrop++;
25331d6c 109 qdisc_qstats_drop(sch);
f38c39d6
PM
110 }
111 return ret;
6b31b28a
TG
112
113congestion_drop:
520ac30f 114 qdisc_drop(skb, sch, to_free);
1da177e4
LT
115 return NET_XMIT_CN;
116}
117
cc7ec456 118static struct sk_buff *red_dequeue(struct Qdisc *sch)
1da177e4
LT
119{
120 struct sk_buff *skb;
121 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6 122 struct Qdisc *child = q->qdisc;
1da177e4 123
f38c39d6 124 skb = child->dequeue(child);
9190b3b3
ED
125 if (skb) {
126 qdisc_bstats_update(sch, skb);
d7f4f332 127 qdisc_qstats_backlog_dec(sch, skb);
f38c39d6 128 sch->q.qlen--;
9190b3b3 129 } else {
eeca6688
ED
130 if (!red_is_idling(&q->vars))
131 red_start_of_idle_period(&q->vars);
9190b3b3 132 }
9e178ff2 133 return skb;
1da177e4
LT
134}
135
cc7ec456 136static struct sk_buff *red_peek(struct Qdisc *sch)
8e3af978
JP
137{
138 struct red_sched_data *q = qdisc_priv(sch);
139 struct Qdisc *child = q->qdisc;
140
141 return child->ops->peek(child);
142}
143
cc7ec456 144static void red_reset(struct Qdisc *sch)
1da177e4
LT
145{
146 struct red_sched_data *q = qdisc_priv(sch);
147
f38c39d6 148 qdisc_reset(q->qdisc);
d7f4f332 149 sch->qstats.backlog = 0;
f38c39d6 150 sch->q.qlen = 0;
eeca6688 151 red_restart(&q->vars);
1da177e4
LT
152}
153
602f3baf
NF
154static int red_offload(struct Qdisc *sch, bool enable)
155{
156 struct red_sched_data *q = qdisc_priv(sch);
157 struct net_device *dev = qdisc_dev(sch);
158 struct tc_red_qopt_offload opt = {
159 .handle = sch->handle,
160 .parent = sch->parent,
161 };
162
163 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
164 return -EOPNOTSUPP;
165
166 if (enable) {
167 opt.command = TC_RED_REPLACE;
168 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
169 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
170 opt.set.probability = q->parms.max_P;
c0b7490b 171 opt.set.limit = q->limit;
602f3baf 172 opt.set.is_ecn = red_use_ecn(q);
190852a5 173 opt.set.is_harddrop = red_use_harddrop(q);
416ef9b1 174 opt.set.qstats = &sch->qstats;
602f3baf
NF
175 } else {
176 opt.command = TC_RED_DESTROY;
177 }
178
8234af2d 179 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
602f3baf
NF
180}
181
f38c39d6
PM
182static void red_destroy(struct Qdisc *sch)
183{
184 struct red_sched_data *q = qdisc_priv(sch);
8af2a218
ED
185
186 del_timer_sync(&q->adapt_timer);
602f3baf 187 red_offload(sch, false);
86bd446b 188 qdisc_put(q->qdisc);
f38c39d6
PM
189}
190
27a3421e 191static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
14bc175d 192 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
27a3421e
PM
193 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
194 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 195 [TCA_RED_MAX_P] = { .type = NLA_U32 },
14bc175d
PM
196 [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
197 .validation_data = &red_supported_flags },
27a3421e
PM
198};
199
2030721c
AA
200static int red_change(struct Qdisc *sch, struct nlattr *opt,
201 struct netlink_ext_ack *extack)
1da177e4 202{
0c8d13ac 203 struct Qdisc *old_child = NULL, *child = NULL;
1da177e4 204 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 205 struct nlattr *tb[TCA_RED_MAX + 1];
14bc175d 206 struct nla_bitfield32 flags_bf;
1da177e4 207 struct tc_red_qopt *ctl;
14bc175d
PM
208 unsigned char userbits;
209 unsigned char flags;
cee63723 210 int err;
a73ed26b 211 u32 max_P;
1da177e4 212
cee63723 213 if (opt == NULL)
dba051f3
TG
214 return -EINVAL;
215
8cb08174
JB
216 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
217 NULL);
cee63723
PM
218 if (err < 0)
219 return err;
220
1e90474c 221 if (tb[TCA_RED_PARMS] == NULL ||
27a3421e 222 tb[TCA_RED_STAB] == NULL)
1da177e4
LT
223 return -EINVAL;
224
a73ed26b
ED
225 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
226
1e90474c 227 ctl = nla_data(tb[TCA_RED_PARMS]);
8afa10cb
NF
228 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
229 return -EINVAL;
1da177e4 230
14bc175d
PM
231 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
232 tb[TCA_RED_FLAGS], red_supported_flags,
233 &flags_bf, &userbits, extack);
234 if (err)
235 return err;
236
f38c39d6 237 if (ctl->limit > 0) {
a38a9882
AA
238 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
239 extack);
fb0305ce
PM
240 if (IS_ERR(child))
241 return PTR_ERR(child);
f38c39d6 242
44a63b13 243 /* child is fifo, no need to check for noop_qdisc */
49b49971 244 qdisc_hash_add(child, true);
44a63b13
PA
245 }
246
1da177e4 247 sch_tree_lock(sch);
14bc175d
PM
248
249 flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
250 err = red_validate_flags(flags, extack);
251 if (err)
252 goto unlock_out;
253
254 q->flags = flags;
255 q->userbits = userbits;
1da177e4 256 q->limit = ctl->limit;
5e50da01 257 if (child) {
e5f0e8f8 258 qdisc_tree_flush_backlog(q->qdisc);
0c8d13ac 259 old_child = q->qdisc;
b94c8afc 260 q->qdisc = child;
5e50da01 261 }
1da177e4 262
eeca6688
ED
263 red_set_parms(&q->parms,
264 ctl->qth_min, ctl->qth_max, ctl->Wlog,
a73ed26b
ED
265 ctl->Plog, ctl->Scell_log,
266 nla_data(tb[TCA_RED_STAB]),
267 max_P);
eeca6688 268 red_set_vars(&q->vars);
6b31b28a 269
8af2a218
ED
270 del_timer(&q->adapt_timer);
271 if (ctl->flags & TC_RED_ADAPTATIVE)
272 mod_timer(&q->adapt_timer, jiffies + HZ/2);
273
1ee5fa1e 274 if (!q->qdisc->q.qlen)
eeca6688 275 red_start_of_idle_period(&q->vars);
dba051f3 276
1da177e4 277 sch_tree_unlock(sch);
0c8d13ac 278
602f3baf 279 red_offload(sch, true);
0c8d13ac
JK
280
281 if (old_child)
282 qdisc_put(old_child);
1da177e4 283 return 0;
14bc175d
PM
284
285unlock_out:
286 sch_tree_unlock(sch);
287 if (child)
288 qdisc_put(child);
289 return err;
1da177e4
LT
290}
291
cdeabbb8 292static inline void red_adaptative_timer(struct timer_list *t)
8af2a218 293{
cdeabbb8
KC
294 struct red_sched_data *q = from_timer(q, t, adapt_timer);
295 struct Qdisc *sch = q->sch;
8af2a218
ED
296 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
297
298 spin_lock(root_lock);
eeca6688 299 red_adaptative_algo(&q->parms, &q->vars);
8af2a218
ED
300 mod_timer(&q->adapt_timer, jiffies + HZ/2);
301 spin_unlock(root_lock);
302}
303
e63d7dfd
AA
304static int red_init(struct Qdisc *sch, struct nlattr *opt,
305 struct netlink_ext_ack *extack)
1da177e4 306{
f38c39d6
PM
307 struct red_sched_data *q = qdisc_priv(sch);
308
309 q->qdisc = &noop_qdisc;
cdeabbb8
KC
310 q->sch = sch;
311 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
2030721c 312 return red_change(sch, opt, extack);
1da177e4
LT
313}
314
dad54c0f 315static int red_dump_offload_stats(struct Qdisc *sch)
602f3baf 316{
602f3baf 317 struct tc_red_qopt_offload hw_stats = {
ee9d3429 318 .command = TC_RED_STATS,
602f3baf
NF
319 .handle = sch->handle,
320 .parent = sch->parent,
ee9d3429
AM
321 {
322 .stats.bstats = &sch->bstats,
323 .stats.qstats = &sch->qstats,
324 },
602f3baf 325 };
8234af2d 326
b592843c 327 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
602f3baf
NF
328}
329
1da177e4
LT
330static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
331{
332 struct red_sched_data *q = qdisc_priv(sch);
14bc175d
PM
333 struct nla_bitfield32 flags_bf = {
334 .selector = red_supported_flags,
335 .value = q->flags,
336 };
1e90474c 337 struct nlattr *opts = NULL;
6b31b28a
TG
338 struct tc_red_qopt opt = {
339 .limit = q->limit,
14bc175d
PM
340 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) |
341 q->userbits,
6b31b28a
TG
342 .qth_min = q->parms.qth_min >> q->parms.Wlog,
343 .qth_max = q->parms.qth_max >> q->parms.Wlog,
344 .Wlog = q->parms.Wlog,
345 .Plog = q->parms.Plog,
346 .Scell_log = q->parms.Scell_log,
347 };
602f3baf 348 int err;
1da177e4 349
dad54c0f 350 err = red_dump_offload_stats(sch);
602f3baf
NF
351 if (err)
352 goto nla_put_failure;
353
ae0be8de 354 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
1e90474c
PM
355 if (opts == NULL)
356 goto nla_put_failure;
1b34ec43 357 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
14bc175d
PM
358 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
359 nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf))
1b34ec43 360 goto nla_put_failure;
1e90474c 361 return nla_nest_end(skb, opts);
1da177e4 362
1e90474c 363nla_put_failure:
bc3ed28c
TG
364 nla_nest_cancel(skb, opts);
365 return -EMSGSIZE;
1da177e4
LT
366}
367
368static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
369{
370 struct red_sched_data *q = qdisc_priv(sch);
602f3baf 371 struct net_device *dev = qdisc_dev(sch);
f8253df5 372 struct tc_red_xstats st = {0};
6b31b28a 373
428a68af 374 if (sch->flags & TCQ_F_OFFLOADED) {
602f3baf 375 struct tc_red_qopt_offload hw_stats_request = {
ee9d3429 376 .command = TC_RED_XSTATS,
602f3baf
NF
377 .handle = sch->handle,
378 .parent = sch->parent,
ee9d3429 379 {
f8253df5 380 .xstats = &q->stats,
ee9d3429 381 },
602f3baf 382 };
f8253df5
NF
383 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
384 &hw_stats_request);
602f3baf 385 }
f8253df5
NF
386 st.early = q->stats.prob_drop + q->stats.forced_drop;
387 st.pdrop = q->stats.pdrop;
388 st.other = q->stats.other;
389 st.marked = q->stats.prob_mark + q->stats.forced_mark;
602f3baf 390
6b31b28a 391 return gnet_stats_copy_app(d, &st, sizeof(st));
1da177e4
LT
392}
393
f38c39d6
PM
394static int red_dump_class(struct Qdisc *sch, unsigned long cl,
395 struct sk_buff *skb, struct tcmsg *tcm)
396{
397 struct red_sched_data *q = qdisc_priv(sch);
398
f38c39d6
PM
399 tcm->tcm_handle |= TC_H_MIN(1);
400 tcm->tcm_info = q->qdisc->handle;
401 return 0;
402}
403
bf2a752b
JK
404static void red_graft_offload(struct Qdisc *sch,
405 struct Qdisc *new, struct Qdisc *old,
406 struct netlink_ext_ack *extack)
407{
408 struct tc_red_qopt_offload graft_offload = {
409 .handle = sch->handle,
410 .parent = sch->parent,
411 .child_handle = new->handle,
412 .command = TC_RED_GRAFT,
413 };
414
415 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
416 TC_SETUP_QDISC_RED, &graft_offload, extack);
417}
418
f38c39d6 419static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 420 struct Qdisc **old, struct netlink_ext_ack *extack)
f38c39d6
PM
421{
422 struct red_sched_data *q = qdisc_priv(sch);
423
424 if (new == NULL)
425 new = &noop_qdisc;
426
86a7996c 427 *old = qdisc_replace(sch, new, &q->qdisc);
bf2a752b
JK
428
429 red_graft_offload(sch, new, *old, extack);
f38c39d6
PM
430 return 0;
431}
432
433static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
434{
435 struct red_sched_data *q = qdisc_priv(sch);
436 return q->qdisc;
437}
438
143976ce 439static unsigned long red_find(struct Qdisc *sch, u32 classid)
f38c39d6
PM
440{
441 return 1;
442}
443
f38c39d6
PM
444static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
445{
446 if (!walker->stop) {
447 if (walker->count >= walker->skip)
448 if (walker->fn(sch, 1, walker) < 0) {
449 walker->stop = 1;
450 return;
451 }
452 walker->count++;
453 }
454}
455
20fea08b 456static const struct Qdisc_class_ops red_class_ops = {
f38c39d6
PM
457 .graft = red_graft,
458 .leaf = red_leaf,
143976ce 459 .find = red_find,
f38c39d6 460 .walk = red_walk,
f38c39d6
PM
461 .dump = red_dump_class,
462};
463
20fea08b 464static struct Qdisc_ops red_qdisc_ops __read_mostly = {
1da177e4
LT
465 .id = "red",
466 .priv_size = sizeof(struct red_sched_data),
f38c39d6 467 .cl_ops = &red_class_ops,
1da177e4
LT
468 .enqueue = red_enqueue,
469 .dequeue = red_dequeue,
8e3af978 470 .peek = red_peek,
1da177e4
LT
471 .init = red_init,
472 .reset = red_reset,
f38c39d6 473 .destroy = red_destroy,
1da177e4
LT
474 .change = red_change,
475 .dump = red_dump,
476 .dump_stats = red_dump_stats,
477 .owner = THIS_MODULE,
478};
479
480static int __init red_module_init(void)
481{
482 return register_qdisc(&red_qdisc_ops);
483}
dba051f3
TG
484
485static void __exit red_module_exit(void)
1da177e4
LT
486{
487 unregister_qdisc(&red_qdisc_ops);
488}
dba051f3 489
1da177e4
LT
490module_init(red_module_init)
491module_exit(red_module_exit)
dba051f3 492
1da177e4 493MODULE_LICENSE("GPL");