Merge branch 'proc-cmdline' (/proc/<pid>/cmdline fixes)
[linux-2.6-block.git] / net / sched / sch_red.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * net/sched/sch_red.c Random Early Detection queue.
4 *
1da177e4
LT
5 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Changes:
dba051f3 8 * J Hadi Salim 980914: computation fixes
1da177e4 9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
dba051f3 10 * J Hadi Salim 980816: ECN support
1da177e4
LT
11 */
12
1da177e4 13#include <linux/module.h>
1da177e4
LT
14#include <linux/types.h>
15#include <linux/kernel.h>
1da177e4 16#include <linux/skbuff.h>
1da177e4 17#include <net/pkt_sched.h>
602f3baf 18#include <net/pkt_cls.h>
1da177e4 19#include <net/inet_ecn.h>
6b31b28a 20#include <net/red.h>
1da177e4
LT
21
22
6b31b28a 23/* Parameters, settable by user:
1da177e4
LT
24 -----------------------------
25
26 limit - bytes (must be > qth_max + burst)
27
28 Hard limit on queue length, should be chosen >qth_max
29 to allow packet bursts. This parameter does not
30 affect the algorithms behaviour and can be chosen
31 arbitrarily high (well, less than ram size)
32 Really, this limit will never be reached
33 if RED works correctly.
1da177e4
LT
34 */
35
cc7ec456 36struct red_sched_data {
6b31b28a
TG
37 u32 limit; /* HARD maximal queue length */
38 unsigned char flags;
8af2a218 39 struct timer_list adapt_timer;
cdeabbb8 40 struct Qdisc *sch;
6b31b28a 41 struct red_parms parms;
eeca6688 42 struct red_vars vars;
6b31b28a 43 struct red_stats stats;
f38c39d6 44 struct Qdisc *qdisc;
1da177e4
LT
45};
46
6b31b28a 47static inline int red_use_ecn(struct red_sched_data *q)
1da177e4 48{
6b31b28a 49 return q->flags & TC_RED_ECN;
1da177e4
LT
50}
51
bdc450a0
TG
52static inline int red_use_harddrop(struct red_sched_data *q)
53{
54 return q->flags & TC_RED_HARDDROP;
55}
56
520ac30f
ED
57static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
58 struct sk_buff **to_free)
1da177e4
LT
59{
60 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6
PM
61 struct Qdisc *child = q->qdisc;
62 int ret;
1da177e4 63
eeca6688
ED
64 q->vars.qavg = red_calc_qavg(&q->parms,
65 &q->vars,
66 child->qstats.backlog);
1da177e4 67
eeca6688
ED
68 if (red_is_idling(&q->vars))
69 red_end_of_idle_period(&q->vars);
1da177e4 70
eeca6688 71 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
cc7ec456
ED
72 case RED_DONT_MARK:
73 break;
74
75 case RED_PROB_MARK:
25331d6c 76 qdisc_qstats_overlimit(sch);
cc7ec456
ED
77 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
78 q->stats.prob_drop++;
79 goto congestion_drop;
80 }
81
82 q->stats.prob_mark++;
83 break;
84
85 case RED_HARD_MARK:
25331d6c 86 qdisc_qstats_overlimit(sch);
cc7ec456
ED
87 if (red_use_harddrop(q) || !red_use_ecn(q) ||
88 !INET_ECN_set_ce(skb)) {
89 q->stats.forced_drop++;
90 goto congestion_drop;
91 }
92
93 q->stats.forced_mark++;
94 break;
1da177e4
LT
95 }
96
520ac30f 97 ret = qdisc_enqueue(skb, child, to_free);
f38c39d6 98 if (likely(ret == NET_XMIT_SUCCESS)) {
d7f4f332 99 qdisc_qstats_backlog_inc(sch, skb);
f38c39d6 100 sch->q.qlen++;
378a2f09 101 } else if (net_xmit_drop_count(ret)) {
f38c39d6 102 q->stats.pdrop++;
25331d6c 103 qdisc_qstats_drop(sch);
f38c39d6
PM
104 }
105 return ret;
6b31b28a
TG
106
107congestion_drop:
520ac30f 108 qdisc_drop(skb, sch, to_free);
1da177e4
LT
109 return NET_XMIT_CN;
110}
111
cc7ec456 112static struct sk_buff *red_dequeue(struct Qdisc *sch)
1da177e4
LT
113{
114 struct sk_buff *skb;
115 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6 116 struct Qdisc *child = q->qdisc;
1da177e4 117
f38c39d6 118 skb = child->dequeue(child);
9190b3b3
ED
119 if (skb) {
120 qdisc_bstats_update(sch, skb);
d7f4f332 121 qdisc_qstats_backlog_dec(sch, skb);
f38c39d6 122 sch->q.qlen--;
9190b3b3 123 } else {
eeca6688
ED
124 if (!red_is_idling(&q->vars))
125 red_start_of_idle_period(&q->vars);
9190b3b3 126 }
9e178ff2 127 return skb;
1da177e4
LT
128}
129
cc7ec456 130static struct sk_buff *red_peek(struct Qdisc *sch)
8e3af978
JP
131{
132 struct red_sched_data *q = qdisc_priv(sch);
133 struct Qdisc *child = q->qdisc;
134
135 return child->ops->peek(child);
136}
137
cc7ec456 138static void red_reset(struct Qdisc *sch)
1da177e4
LT
139{
140 struct red_sched_data *q = qdisc_priv(sch);
141
f38c39d6 142 qdisc_reset(q->qdisc);
d7f4f332 143 sch->qstats.backlog = 0;
f38c39d6 144 sch->q.qlen = 0;
eeca6688 145 red_restart(&q->vars);
1da177e4
LT
146}
147
602f3baf
NF
148static int red_offload(struct Qdisc *sch, bool enable)
149{
150 struct red_sched_data *q = qdisc_priv(sch);
151 struct net_device *dev = qdisc_dev(sch);
152 struct tc_red_qopt_offload opt = {
153 .handle = sch->handle,
154 .parent = sch->parent,
155 };
156
157 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
158 return -EOPNOTSUPP;
159
160 if (enable) {
161 opt.command = TC_RED_REPLACE;
162 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
163 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
164 opt.set.probability = q->parms.max_P;
c0b7490b 165 opt.set.limit = q->limit;
602f3baf 166 opt.set.is_ecn = red_use_ecn(q);
190852a5 167 opt.set.is_harddrop = red_use_harddrop(q);
416ef9b1 168 opt.set.qstats = &sch->qstats;
602f3baf
NF
169 } else {
170 opt.command = TC_RED_DESTROY;
171 }
172
8234af2d 173 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
602f3baf
NF
174}
175
f38c39d6
PM
176static void red_destroy(struct Qdisc *sch)
177{
178 struct red_sched_data *q = qdisc_priv(sch);
8af2a218
ED
179
180 del_timer_sync(&q->adapt_timer);
602f3baf 181 red_offload(sch, false);
86bd446b 182 qdisc_put(q->qdisc);
f38c39d6
PM
183}
184
27a3421e
PM
185static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
186 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
187 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 188 [TCA_RED_MAX_P] = { .type = NLA_U32 },
27a3421e
PM
189};
190
2030721c
AA
191static int red_change(struct Qdisc *sch, struct nlattr *opt,
192 struct netlink_ext_ack *extack)
1da177e4 193{
0c8d13ac 194 struct Qdisc *old_child = NULL, *child = NULL;
1da177e4 195 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 196 struct nlattr *tb[TCA_RED_MAX + 1];
1da177e4 197 struct tc_red_qopt *ctl;
cee63723 198 int err;
a73ed26b 199 u32 max_P;
1da177e4 200
cee63723 201 if (opt == NULL)
dba051f3
TG
202 return -EINVAL;
203
8cb08174
JB
204 err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
205 NULL);
cee63723
PM
206 if (err < 0)
207 return err;
208
1e90474c 209 if (tb[TCA_RED_PARMS] == NULL ||
27a3421e 210 tb[TCA_RED_STAB] == NULL)
1da177e4
LT
211 return -EINVAL;
212
a73ed26b
ED
213 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
214
1e90474c 215 ctl = nla_data(tb[TCA_RED_PARMS]);
8afa10cb
NF
216 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
217 return -EINVAL;
1da177e4 218
f38c39d6 219 if (ctl->limit > 0) {
a38a9882
AA
220 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
221 extack);
fb0305ce
PM
222 if (IS_ERR(child))
223 return PTR_ERR(child);
f38c39d6 224
44a63b13 225 /* child is fifo, no need to check for noop_qdisc */
49b49971 226 qdisc_hash_add(child, true);
44a63b13
PA
227 }
228
1da177e4
LT
229 sch_tree_lock(sch);
230 q->flags = ctl->flags;
1da177e4 231 q->limit = ctl->limit;
5e50da01 232 if (child) {
e5f0e8f8 233 qdisc_tree_flush_backlog(q->qdisc);
0c8d13ac 234 old_child = q->qdisc;
b94c8afc 235 q->qdisc = child;
5e50da01 236 }
1da177e4 237
eeca6688
ED
238 red_set_parms(&q->parms,
239 ctl->qth_min, ctl->qth_max, ctl->Wlog,
a73ed26b
ED
240 ctl->Plog, ctl->Scell_log,
241 nla_data(tb[TCA_RED_STAB]),
242 max_P);
eeca6688 243 red_set_vars(&q->vars);
6b31b28a 244
8af2a218
ED
245 del_timer(&q->adapt_timer);
246 if (ctl->flags & TC_RED_ADAPTATIVE)
247 mod_timer(&q->adapt_timer, jiffies + HZ/2);
248
1ee5fa1e 249 if (!q->qdisc->q.qlen)
eeca6688 250 red_start_of_idle_period(&q->vars);
dba051f3 251
1da177e4 252 sch_tree_unlock(sch);
0c8d13ac 253
602f3baf 254 red_offload(sch, true);
0c8d13ac
JK
255
256 if (old_child)
257 qdisc_put(old_child);
1da177e4
LT
258 return 0;
259}
260
cdeabbb8 261static inline void red_adaptative_timer(struct timer_list *t)
8af2a218 262{
cdeabbb8
KC
263 struct red_sched_data *q = from_timer(q, t, adapt_timer);
264 struct Qdisc *sch = q->sch;
8af2a218
ED
265 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
266
267 spin_lock(root_lock);
eeca6688 268 red_adaptative_algo(&q->parms, &q->vars);
8af2a218
ED
269 mod_timer(&q->adapt_timer, jiffies + HZ/2);
270 spin_unlock(root_lock);
271}
272
e63d7dfd
AA
273static int red_init(struct Qdisc *sch, struct nlattr *opt,
274 struct netlink_ext_ack *extack)
1da177e4 275{
f38c39d6
PM
276 struct red_sched_data *q = qdisc_priv(sch);
277
278 q->qdisc = &noop_qdisc;
cdeabbb8
KC
279 q->sch = sch;
280 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
2030721c 281 return red_change(sch, opt, extack);
1da177e4
LT
282}
283
dad54c0f 284static int red_dump_offload_stats(struct Qdisc *sch)
602f3baf 285{
602f3baf 286 struct tc_red_qopt_offload hw_stats = {
ee9d3429 287 .command = TC_RED_STATS,
602f3baf
NF
288 .handle = sch->handle,
289 .parent = sch->parent,
ee9d3429
AM
290 {
291 .stats.bstats = &sch->bstats,
292 .stats.qstats = &sch->qstats,
293 },
602f3baf 294 };
8234af2d 295
b592843c 296 return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
602f3baf
NF
297}
298
1da177e4
LT
299static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
300{
301 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 302 struct nlattr *opts = NULL;
6b31b28a
TG
303 struct tc_red_qopt opt = {
304 .limit = q->limit,
305 .flags = q->flags,
306 .qth_min = q->parms.qth_min >> q->parms.Wlog,
307 .qth_max = q->parms.qth_max >> q->parms.Wlog,
308 .Wlog = q->parms.Wlog,
309 .Plog = q->parms.Plog,
310 .Scell_log = q->parms.Scell_log,
311 };
602f3baf 312 int err;
1da177e4 313
dad54c0f 314 err = red_dump_offload_stats(sch);
602f3baf
NF
315 if (err)
316 goto nla_put_failure;
317
ae0be8de 318 opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
1e90474c
PM
319 if (opts == NULL)
320 goto nla_put_failure;
1b34ec43
DM
321 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
322 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
323 goto nla_put_failure;
1e90474c 324 return nla_nest_end(skb, opts);
1da177e4 325
1e90474c 326nla_put_failure:
bc3ed28c
TG
327 nla_nest_cancel(skb, opts);
328 return -EMSGSIZE;
1da177e4
LT
329}
330
331static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
332{
333 struct red_sched_data *q = qdisc_priv(sch);
602f3baf 334 struct net_device *dev = qdisc_dev(sch);
f8253df5 335 struct tc_red_xstats st = {0};
6b31b28a 336
428a68af 337 if (sch->flags & TCQ_F_OFFLOADED) {
602f3baf 338 struct tc_red_qopt_offload hw_stats_request = {
ee9d3429 339 .command = TC_RED_XSTATS,
602f3baf
NF
340 .handle = sch->handle,
341 .parent = sch->parent,
ee9d3429 342 {
f8253df5 343 .xstats = &q->stats,
ee9d3429 344 },
602f3baf 345 };
f8253df5
NF
346 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
347 &hw_stats_request);
602f3baf 348 }
f8253df5
NF
349 st.early = q->stats.prob_drop + q->stats.forced_drop;
350 st.pdrop = q->stats.pdrop;
351 st.other = q->stats.other;
352 st.marked = q->stats.prob_mark + q->stats.forced_mark;
602f3baf 353
6b31b28a 354 return gnet_stats_copy_app(d, &st, sizeof(st));
1da177e4
LT
355}
356
f38c39d6
PM
357static int red_dump_class(struct Qdisc *sch, unsigned long cl,
358 struct sk_buff *skb, struct tcmsg *tcm)
359{
360 struct red_sched_data *q = qdisc_priv(sch);
361
f38c39d6
PM
362 tcm->tcm_handle |= TC_H_MIN(1);
363 tcm->tcm_info = q->qdisc->handle;
364 return 0;
365}
366
bf2a752b
JK
367static void red_graft_offload(struct Qdisc *sch,
368 struct Qdisc *new, struct Qdisc *old,
369 struct netlink_ext_ack *extack)
370{
371 struct tc_red_qopt_offload graft_offload = {
372 .handle = sch->handle,
373 .parent = sch->parent,
374 .child_handle = new->handle,
375 .command = TC_RED_GRAFT,
376 };
377
378 qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
379 TC_SETUP_QDISC_RED, &graft_offload, extack);
380}
381
f38c39d6 382static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 383 struct Qdisc **old, struct netlink_ext_ack *extack)
f38c39d6
PM
384{
385 struct red_sched_data *q = qdisc_priv(sch);
386
387 if (new == NULL)
388 new = &noop_qdisc;
389
86a7996c 390 *old = qdisc_replace(sch, new, &q->qdisc);
bf2a752b
JK
391
392 red_graft_offload(sch, new, *old, extack);
f38c39d6
PM
393 return 0;
394}
395
396static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
397{
398 struct red_sched_data *q = qdisc_priv(sch);
399 return q->qdisc;
400}
401
143976ce 402static unsigned long red_find(struct Qdisc *sch, u32 classid)
f38c39d6
PM
403{
404 return 1;
405}
406
f38c39d6
PM
407static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
408{
409 if (!walker->stop) {
410 if (walker->count >= walker->skip)
411 if (walker->fn(sch, 1, walker) < 0) {
412 walker->stop = 1;
413 return;
414 }
415 walker->count++;
416 }
417}
418
20fea08b 419static const struct Qdisc_class_ops red_class_ops = {
f38c39d6
PM
420 .graft = red_graft,
421 .leaf = red_leaf,
143976ce 422 .find = red_find,
f38c39d6 423 .walk = red_walk,
f38c39d6
PM
424 .dump = red_dump_class,
425};
426
20fea08b 427static struct Qdisc_ops red_qdisc_ops __read_mostly = {
1da177e4
LT
428 .id = "red",
429 .priv_size = sizeof(struct red_sched_data),
f38c39d6 430 .cl_ops = &red_class_ops,
1da177e4
LT
431 .enqueue = red_enqueue,
432 .dequeue = red_dequeue,
8e3af978 433 .peek = red_peek,
1da177e4
LT
434 .init = red_init,
435 .reset = red_reset,
f38c39d6 436 .destroy = red_destroy,
1da177e4
LT
437 .change = red_change,
438 .dump = red_dump,
439 .dump_stats = red_dump_stats,
440 .owner = THIS_MODULE,
441};
442
443static int __init red_module_init(void)
444{
445 return register_qdisc(&red_qdisc_ops);
446}
dba051f3
TG
447
448static void __exit red_module_exit(void)
1da177e4
LT
449{
450 unregister_qdisc(&red_qdisc_ops);
451}
dba051f3 452
1da177e4
LT
453module_init(red_module_init)
454module_exit(red_module_exit)
dba051f3 455
1da177e4 456MODULE_LICENSE("GPL");