Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec...
[linux-block.git] / net / sched / sch_red.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * net/sched/sch_red.c Random Early Detection queue.
4 *
1da177e4
LT
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Changes:
dba051f3 8 * J Hadi Salim 980914: computation fixes
1da177e4 9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
dba051f3 10 * J Hadi Salim 980816: ECN support
1da177e4
LT
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/skbuff.h>
1da177e4 17#include <net/pkt_sched.h>
602f3baf 18#include <net/pkt_cls.h>
1da177e4 19#include <net/inet_ecn.h>
6b31b28a 20#include <net/red.h>
1da177e4
LT
21
22
6b31b28a 23/* Parameters, settable by user:
1da177e4
LT
24 -----------------------------
25
26 limit - bytes (must be > qth_max + burst)
27
28 Hard limit on queue length, should be chosen >qth_max
29 to allow packet bursts. This parameter does not
30 affect the algorithms behaviour and can be chosen
31 arbitrarily high (well, less than ram size)
32 Really, this limit will never be reached
33 if RED works correctly.
1da177e4
LT
34 */
35
cc7ec456 36struct red_sched_data {
6b31b28a 37 u32 limit; /* HARD maximal queue length */
14bc175d 38
6b31b28a 39 unsigned char flags;
14bc175d
PM
40 /* Non-flags in tc_red_qopt.flags. */
41 unsigned char userbits;
42
8af2a218 43 struct timer_list adapt_timer;
cdeabbb8 44 struct Qdisc *sch;
6b31b28a 45 struct red_parms parms;
eeca6688 46 struct red_vars vars;
6b31b28a 47 struct red_stats stats;
f38c39d6 48 struct Qdisc *qdisc;
1da177e4
LT
49};
50
0a7fad23 51static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
14bc175d 52
6b31b28a 53static inline int red_use_ecn(struct red_sched_data *q)
1da177e4 54{
6b31b28a 55 return q->flags & TC_RED_ECN;
1da177e4
LT
56}
57
bdc450a0
TG
58static inline int red_use_harddrop(struct red_sched_data *q)
59{
60 return q->flags & TC_RED_HARDDROP;
61}
62
0a7fad23
PM
63static int red_use_nodrop(struct red_sched_data *q)
64{
65 return q->flags & TC_RED_NODROP;
66}
67
520ac30f
ED
68static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
69 struct sk_buff **to_free)
1da177e4
LT
70{
71 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6
PM
72 struct Qdisc *child = q->qdisc;
73 int ret;
1da177e4 74
eeca6688
ED
75 q->vars.qavg = red_calc_qavg(&q->parms,
76 &q->vars,
77 child->qstats.backlog);
1da177e4 78
eeca6688
ED
79 if (red_is_idling(&q->vars))
80 red_end_of_idle_period(&q->vars);
1da177e4 81
eeca6688 82 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
cc7ec456
ED
83 case RED_DONT_MARK:
84 break;
85
86 case RED_PROB_MARK:
25331d6c 87 qdisc_qstats_overlimit(sch);
0a7fad23 88 if (!red_use_ecn(q)) {
cc7ec456
ED
89 q->stats.prob_drop++;
90 goto congestion_drop;
91 }
92
0a7fad23
PM
93 if (INET_ECN_set_ce(skb)) {
94 q->stats.prob_mark++;
95 } else if (!red_use_nodrop(q)) {
96 q->stats.prob_drop++;
97 goto congestion_drop;
98 }
99
100 /* Non-ECT packet in ECN nodrop mode: queue it. */
cc7ec456
ED
101 break;
102
103 case RED_HARD_MARK:
25331d6c 104 qdisc_qstats_overlimit(sch);
0a7fad23
PM
105 if (red_use_harddrop(q) || !red_use_ecn(q)) {
106 q->stats.forced_drop++;
107 goto congestion_drop;
108 }
109
110 if (INET_ECN_set_ce(skb)) {
111 q->stats.forced_mark++;
112 } else if (!red_use_nodrop(q)) {
cc7ec456
ED
113 q->stats.forced_drop++;
114 goto congestion_drop;
115 }
116
0a7fad23 117 /* Non-ECT packet in ECN nodrop mode: queue it. */
cc7ec456 118 break;
1da177e4
LT
119 }
120
520ac30f 121 ret = qdisc_enqueue(skb, child, to_free);
f38c39d6 122 if (likely(ret == NET_XMIT_SUCCESS)) {
d7f4f332 123 qdisc_qstats_backlog_inc(sch, skb);
f38c39d6 124 sch->q.qlen++;
378a2f09 125 } else if (net_xmit_drop_count(ret)) {
f38c39d6 126 q->stats.pdrop++;
25331d6c 127 qdisc_qstats_drop(sch);
f38c39d6
PM
128 }
129 return ret;
6b31b28a
TG
130
131congestion_drop:
520ac30f 132 qdisc_drop(skb, sch, to_free);
1da177e4
LT
133 return NET_XMIT_CN;
134}
135
cc7ec456 136static struct sk_buff *red_dequeue(struct Qdisc *sch)
1da177e4
LT
137{
138 struct sk_buff *skb;
139 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6 140 struct Qdisc *child = q->qdisc;
1da177e4 141
f38c39d6 142 skb = child->dequeue(child);
9190b3b3
ED
143 if (skb) {
144 qdisc_bstats_update(sch, skb);
d7f4f332 145 qdisc_qstats_backlog_dec(sch, skb);
f38c39d6 146 sch->q.qlen--;
9190b3b3 147 } else {
eeca6688
ED
148 if (!red_is_idling(&q->vars))
149 red_start_of_idle_period(&q->vars);
9190b3b3 150 }
9e178ff2 151 return skb;
1da177e4
LT
152}
153
cc7ec456 154static struct sk_buff *red_peek(struct Qdisc *sch)
8e3af978
JP
155{
156 struct red_sched_data *q = qdisc_priv(sch);
157 struct Qdisc *child = q->qdisc;
158
159 return child->ops->peek(child);
160}
161
cc7ec456 162static void red_reset(struct Qdisc *sch)
1da177e4
LT
163{
164 struct red_sched_data *q = qdisc_priv(sch);
165
f38c39d6 166 qdisc_reset(q->qdisc);
d7f4f332 167 sch->qstats.backlog = 0;
f38c39d6 168 sch->q.qlen = 0;
eeca6688 169 red_restart(&q->vars);
1da177e4
LT
170}
171
602f3baf
NF
172static int red_offload(struct Qdisc *sch, bool enable)
173{
174 struct red_sched_data *q = qdisc_priv(sch);
175 struct net_device *dev = qdisc_dev(sch);
176 struct tc_red_qopt_offload opt = {
177 .handle = sch->handle,
178 .parent = sch->parent,
179 };
180
181 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
182 return -EOPNOTSUPP;
183
184 if (enable) {
185 opt.command = TC_RED_REPLACE;
186 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
187 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
188 opt.set.probability = q->parms.max_P;
c0b7490b 189 opt.set.limit = q->limit;
602f3baf 190 opt.set.is_ecn = red_use_ecn(q);
190852a5 191 opt.set.is_harddrop = red_use_harddrop(q);
0a7fad23 192 opt.set.is_nodrop = red_use_nodrop(q);
416ef9b1 193 opt.set.qstats = &sch->qstats;
602f3baf
NF
194 } else {
195 opt.command = TC_RED_DESTROY;
196 }
197
8234af2d 198 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
602f3baf
NF
199}
200
f38c39d6
PM
201static void red_destroy(struct Qdisc *sch)
202{
203 struct red_sched_data *q = qdisc_priv(sch);
8af2a218
ED
204
205 del_timer_sync(&q->adapt_timer);
602f3baf 206 red_offload(sch, false);
86bd446b 207 qdisc_put(q->qdisc);
f38c39d6
PM
208}
209
27a3421e 210static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
14bc175d 211 [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
27a3421e
PM
212 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
213 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 214 [TCA_RED_MAX_P] = { .type = NLA_U32 },
14bc175d
PM
215 [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
216 .validation_data = &red_supported_flags },
27a3421e
PM
217};
218
2030721c
AA
219static int red_change(struct Qdisc *sch, struct nlattr *opt,
220 struct netlink_ext_ack *extack)
1da177e4 221{
0c8d13ac 222 struct Qdisc *old_child = NULL, *child = NULL;
1da177e4 223 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 224 struct nlattr *tb[TCA_RED_MAX + 1];
14bc175d 225 struct nla_bitfield32 flags_bf;
1da177e4 226 struct tc_red_qopt *ctl;
14bc175d
PM
227 unsigned char userbits;
228 unsigned char flags;
cee63723 229 int err;
a73ed26b 230 u32 max_P;
1da177e4 231
cee63723 232 if (opt == NULL)
dba051f3
TG
233 return -EINVAL;
234
8cb08174
JB
235 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
236 NULL);
cee63723
PM
237 if (err < 0)
238 return err;
239
1e90474c 240 if (tb[TCA_RED_PARMS] == NULL ||
27a3421e 241 tb[TCA_RED_STAB] == NULL)
1da177e4
LT
242 return -EINVAL;
243
a73ed26b
ED
244 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
245
1e90474c 246 ctl = nla_data(tb[TCA_RED_PARMS]);
8afa10cb
NF
247 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
248 return -EINVAL;
1da177e4 249
14bc175d
PM
250 err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
251 tb[TCA_RED_FLAGS], red_supported_flags,
252 &flags_bf, &userbits, extack);
253 if (err)
254 return err;
255
f38c39d6 256 if (ctl->limit > 0) {
a38a9882
AA
257 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
258 extack);
fb0305ce
PM
259 if (IS_ERR(child))
260 return PTR_ERR(child);
f38c39d6 261
44a63b13 262 /* child is fifo, no need to check for noop_qdisc */
49b49971 263 qdisc_hash_add(child, true);
44a63b13
PA
264 }
265
1da177e4 266 sch_tree_lock(sch);
14bc175d
PM
267
268 flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
269 err = red_validate_flags(flags, extack);
270 if (err)
271 goto unlock_out;
272
273 q->flags = flags;
274 q->userbits = userbits;
1da177e4 275 q->limit = ctl->limit;
5e50da01 276 if (child) {
e5f0e8f8 277 qdisc_tree_flush_backlog(q->qdisc);
0c8d13ac 278 old_child = q->qdisc;
b94c8afc 279 q->qdisc = child;
5e50da01 280 }
1da177e4 281
eeca6688
ED
282 red_set_parms(&q->parms,
283 ctl->qth_min, ctl->qth_max, ctl->Wlog,
a73ed26b
ED
284 ctl->Plog, ctl->Scell_log,
285 nla_data(tb[TCA_RED_STAB]),
286 max_P);
eeca6688 287 red_set_vars(&q->vars);
6b31b28a 288
8af2a218
ED
289 del_timer(&q->adapt_timer);
290 if (ctl->flags & TC_RED_ADAPTATIVE)
291 mod_timer(&q->adapt_timer, jiffies + HZ/2);
292
1ee5fa1e 293 if (!q->qdisc->q.qlen)
eeca6688 294 red_start_of_idle_period(&q->vars);
dba051f3 295
1da177e4 296 sch_tree_unlock(sch);
0c8d13ac 297
602f3baf 298 red_offload(sch, true);
0c8d13ac
JK
299
300 if (old_child)
301 qdisc_put(old_child);
1da177e4 302 return 0;
14bc175d
PM
303
304unlock_out:
305 sch_tree_unlock(sch);
306 if (child)
307 qdisc_put(child);
308 return err;
1da177e4
LT
309}
310
cdeabbb8 311static inline void red_adaptative_timer(struct timer_list *t)
8af2a218 312{
cdeabbb8
KC
313 struct red_sched_data *q = from_timer(q, t, adapt_timer);
314 struct Qdisc *sch = q->sch;
8af2a218
ED
315 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
316
317 spin_lock(root_lock);
eeca6688 318 red_adaptative_algo(&q->parms, &q->vars);
8af2a218
ED
319 mod_timer(&q->adapt_timer, jiffies + HZ/2);
320 spin_unlock(root_lock);
321}
322
e63d7dfd
AA
323static int red_init(struct Qdisc *sch, struct nlattr *opt,
324 struct netlink_ext_ack *extack)
1da177e4 325{
f38c39d6
PM
326 struct red_sched_data *q = qdisc_priv(sch);
327
328 q->qdisc = &noop_qdisc;
cdeabbb8
KC
329 q->sch = sch;
330 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
2030721c 331 return red_change(sch, opt, extack);
1da177e4
LT
332}
333
dad54c0f 334static int red_dump_offload_stats(struct Qdisc *sch)
602f3baf 335{
602f3baf 336 struct tc_red_qopt_offload hw_stats = {
ee9d3429 337 .command = TC_RED_STATS,
602f3baf
NF
338 .handle = sch->handle,
339 .parent = sch->parent,
ee9d3429
AM
340 {
341 .stats.bstats = &sch->bstats,
342 .stats.qstats = &sch->qstats,
343 },
602f3baf 344 };
8234af2d 345
b592843c 346 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
602f3baf
NF
347}
348
1da177e4
LT
349static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
350{
351 struct red_sched_data *q = qdisc_priv(sch);
14bc175d
PM
352 struct nla_bitfield32 flags_bf = {
353 .selector = red_supported_flags,
354 .value = q->flags,
355 };
1e90474c 356 struct nlattr *opts = NULL;
6b31b28a
TG
357 struct tc_red_qopt opt = {
358 .limit = q->limit,
14bc175d
PM
359 .flags = (q->flags & TC_RED_HISTORIC_FLAGS) |
360 q->userbits,
6b31b28a
TG
361 .qth_min = q->parms.qth_min >> q->parms.Wlog,
362 .qth_max = q->parms.qth_max >> q->parms.Wlog,
363 .Wlog = q->parms.Wlog,
364 .Plog = q->parms.Plog,
365 .Scell_log = q->parms.Scell_log,
366 };
602f3baf 367 int err;
1da177e4 368
dad54c0f 369 err = red_dump_offload_stats(sch);
602f3baf
NF
370 if (err)
371 goto nla_put_failure;
372
ae0be8de 373 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
1e90474c
PM
374 if (opts == NULL)
375 goto nla_put_failure;
1b34ec43 376 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
14bc175d
PM
377 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
378 nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf))
1b34ec43 379 goto nla_put_failure;
1e90474c 380 return nla_nest_end(skb, opts);
1da177e4 381
1e90474c 382nla_put_failure:
bc3ed28c
TG
383 nla_nest_cancel(skb, opts);
384 return -EMSGSIZE;
1da177e4
LT
385}
386
387static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
388{
389 struct red_sched_data *q = qdisc_priv(sch);
602f3baf 390 struct net_device *dev = qdisc_dev(sch);
f8253df5 391 struct tc_red_xstats st = {0};
6b31b28a 392
428a68af 393 if (sch->flags & TCQ_F_OFFLOADED) {
602f3baf 394 struct tc_red_qopt_offload hw_stats_request = {
ee9d3429 395 .command = TC_RED_XSTATS,
602f3baf
NF
396 .handle = sch->handle,
397 .parent = sch->parent,
ee9d3429 398 {
f8253df5 399 .xstats = &q->stats,
ee9d3429 400 },
602f3baf 401 };
f8253df5
NF
402 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
403 &hw_stats_request);
602f3baf 404 }
f8253df5
NF
405 st.early = q->stats.prob_drop + q->stats.forced_drop;
406 st.pdrop = q->stats.pdrop;
407 st.other = q->stats.other;
408 st.marked = q->stats.prob_mark + q->stats.forced_mark;
602f3baf 409
6b31b28a 410 return gnet_stats_copy_app(d, &st, sizeof(st));
1da177e4
LT
411}
412
f38c39d6
PM
413static int red_dump_class(struct Qdisc *sch, unsigned long cl,
414 struct sk_buff *skb, struct tcmsg *tcm)
415{
416 struct red_sched_data *q = qdisc_priv(sch);
417
f38c39d6
PM
418 tcm->tcm_handle |= TC_H_MIN(1);
419 tcm->tcm_info = q->qdisc->handle;
420 return 0;
421}
422
bf2a752b
JK
423static void red_graft_offload(struct Qdisc *sch,
424 struct Qdisc *new, struct Qdisc *old,
425 struct netlink_ext_ack *extack)
426{
427 struct tc_red_qopt_offload graft_offload = {
428 .handle = sch->handle,
429 .parent = sch->parent,
430 .child_handle = new->handle,
431 .command = TC_RED_GRAFT,
432 };
433
434 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
435 TC_SETUP_QDISC_RED, &graft_offload, extack);
436}
437
f38c39d6 438static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 439 struct Qdisc **old, struct netlink_ext_ack *extack)
f38c39d6
PM
440{
441 struct red_sched_data *q = qdisc_priv(sch);
442
443 if (new == NULL)
444 new = &noop_qdisc;
445
86a7996c 446 *old = qdisc_replace(sch, new, &q->qdisc);
bf2a752b
JK
447
448 red_graft_offload(sch, new, *old, extack);
f38c39d6
PM
449 return 0;
450}
451
452static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
453{
454 struct red_sched_data *q = qdisc_priv(sch);
455 return q->qdisc;
456}
457
143976ce 458static unsigned long red_find(struct Qdisc *sch, u32 classid)
f38c39d6
PM
459{
460 return 1;
461}
462
f38c39d6
PM
463static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
464{
465 if (!walker->stop) {
466 if (walker->count >= walker->skip)
467 if (walker->fn(sch, 1, walker) < 0) {
468 walker->stop = 1;
469 return;
470 }
471 walker->count++;
472 }
473}
474
20fea08b 475static const struct Qdisc_class_ops red_class_ops = {
f38c39d6
PM
476 .graft = red_graft,
477 .leaf = red_leaf,
143976ce 478 .find = red_find,
f38c39d6 479 .walk = red_walk,
f38c39d6
PM
480 .dump = red_dump_class,
481};
482
20fea08b 483static struct Qdisc_ops red_qdisc_ops __read_mostly = {
1da177e4
LT
484 .id = "red",
485 .priv_size = sizeof(struct red_sched_data),
f38c39d6 486 .cl_ops = &red_class_ops,
1da177e4
LT
487 .enqueue = red_enqueue,
488 .dequeue = red_dequeue,
8e3af978 489 .peek = red_peek,
1da177e4
LT
490 .init = red_init,
491 .reset = red_reset,
f38c39d6 492 .destroy = red_destroy,
1da177e4
LT
493 .change = red_change,
494 .dump = red_dump,
495 .dump_stats = red_dump_stats,
496 .owner = THIS_MODULE,
497};
498
499static int __init red_module_init(void)
500{
501 return register_qdisc(&red_qdisc_ops);
502}
dba051f3
TG
503
504static void __exit red_module_exit(void)
1da177e4
LT
505{
506 unregister_qdisc(&red_qdisc_ops);
507}
dba051f3 508
1da177e4
LT
509module_init(red_module_init)
510module_exit(red_module_exit)
dba051f3 511
1da177e4 512MODULE_LICENSE("GPL");