Merge tag 'block-5.7-2020-04-10' of git://git.kernel.dk/linux-block
[linux-block.git] / net / sched / sch_red.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * net/sched/sch_red.c  Random Early Detection queue.
4  *
5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6  *
7  * Changes:
8  * J Hadi Salim 980914: computation fixes
9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
10  * J Hadi Salim 980816:  ECN support
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/skbuff.h>
17 #include <net/pkt_sched.h>
18 #include <net/pkt_cls.h>
19 #include <net/inet_ecn.h>
20 #include <net/red.h>
21
22
23 /*      Parameters, settable by user:
24         -----------------------------
25
26         limit           - bytes (must be > qth_max + burst)
27
28         Hard limit on queue length, should be chosen >qth_max
29         to allow packet bursts. This parameter does not
30         affect the algorithms behaviour and can be chosen
31         arbitrarily high (well, less than ram size)
32         Really, this limit will never be reached
33         if RED works correctly.
34  */
35
36 struct red_sched_data {
37         u32                     limit;          /* HARD maximal queue length */
38
39         unsigned char           flags;
40         /* Non-flags in tc_red_qopt.flags. */
41         unsigned char           userbits;
42
43         struct timer_list       adapt_timer;
44         struct Qdisc            *sch;
45         struct red_parms        parms;
46         struct red_vars         vars;
47         struct red_stats        stats;
48         struct Qdisc            *qdisc;
49 };
50
51 static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
52
53 static inline int red_use_ecn(struct red_sched_data *q)
54 {
55         return q->flags & TC_RED_ECN;
56 }
57
58 static inline int red_use_harddrop(struct red_sched_data *q)
59 {
60         return q->flags & TC_RED_HARDDROP;
61 }
62
63 static int red_use_nodrop(struct red_sched_data *q)
64 {
65         return q->flags & TC_RED_NODROP;
66 }
67
68 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
69                        struct sk_buff **to_free)
70 {
71         struct red_sched_data *q = qdisc_priv(sch);
72         struct Qdisc *child = q->qdisc;
73         int ret;
74
75         q->vars.qavg = red_calc_qavg(&q->parms,
76                                      &q->vars,
77                                      child->qstats.backlog);
78
79         if (red_is_idling(&q->vars))
80                 red_end_of_idle_period(&q->vars);
81
82         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
83         case RED_DONT_MARK:
84                 break;
85
86         case RED_PROB_MARK:
87                 qdisc_qstats_overlimit(sch);
88                 if (!red_use_ecn(q)) {
89                         q->stats.prob_drop++;
90                         goto congestion_drop;
91                 }
92
93                 if (INET_ECN_set_ce(skb)) {
94                         q->stats.prob_mark++;
95                 } else if (!red_use_nodrop(q)) {
96                         q->stats.prob_drop++;
97                         goto congestion_drop;
98                 }
99
100                 /* Non-ECT packet in ECN nodrop mode: queue it. */
101                 break;
102
103         case RED_HARD_MARK:
104                 qdisc_qstats_overlimit(sch);
105                 if (red_use_harddrop(q) || !red_use_ecn(q)) {
106                         q->stats.forced_drop++;
107                         goto congestion_drop;
108                 }
109
110                 if (INET_ECN_set_ce(skb)) {
111                         q->stats.forced_mark++;
112                 } else if (!red_use_nodrop(q)) {
113                         q->stats.forced_drop++;
114                         goto congestion_drop;
115                 }
116
117                 /* Non-ECT packet in ECN nodrop mode: queue it. */
118                 break;
119         }
120
121         ret = qdisc_enqueue(skb, child, to_free);
122         if (likely(ret == NET_XMIT_SUCCESS)) {
123                 qdisc_qstats_backlog_inc(sch, skb);
124                 sch->q.qlen++;
125         } else if (net_xmit_drop_count(ret)) {
126                 q->stats.pdrop++;
127                 qdisc_qstats_drop(sch);
128         }
129         return ret;
130
131 congestion_drop:
132         qdisc_drop(skb, sch, to_free);
133         return NET_XMIT_CN;
134 }
135
136 static struct sk_buff *red_dequeue(struct Qdisc *sch)
137 {
138         struct sk_buff *skb;
139         struct red_sched_data *q = qdisc_priv(sch);
140         struct Qdisc *child = q->qdisc;
141
142         skb = child->dequeue(child);
143         if (skb) {
144                 qdisc_bstats_update(sch, skb);
145                 qdisc_qstats_backlog_dec(sch, skb);
146                 sch->q.qlen--;
147         } else {
148                 if (!red_is_idling(&q->vars))
149                         red_start_of_idle_period(&q->vars);
150         }
151         return skb;
152 }
153
154 static struct sk_buff *red_peek(struct Qdisc *sch)
155 {
156         struct red_sched_data *q = qdisc_priv(sch);
157         struct Qdisc *child = q->qdisc;
158
159         return child->ops->peek(child);
160 }
161
162 static void red_reset(struct Qdisc *sch)
163 {
164         struct red_sched_data *q = qdisc_priv(sch);
165
166         qdisc_reset(q->qdisc);
167         sch->qstats.backlog = 0;
168         sch->q.qlen = 0;
169         red_restart(&q->vars);
170 }
171
172 static int red_offload(struct Qdisc *sch, bool enable)
173 {
174         struct red_sched_data *q = qdisc_priv(sch);
175         struct net_device *dev = qdisc_dev(sch);
176         struct tc_red_qopt_offload opt = {
177                 .handle = sch->handle,
178                 .parent = sch->parent,
179         };
180
181         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
182                 return -EOPNOTSUPP;
183
184         if (enable) {
185                 opt.command = TC_RED_REPLACE;
186                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
187                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
188                 opt.set.probability = q->parms.max_P;
189                 opt.set.limit = q->limit;
190                 opt.set.is_ecn = red_use_ecn(q);
191                 opt.set.is_harddrop = red_use_harddrop(q);
192                 opt.set.is_nodrop = red_use_nodrop(q);
193                 opt.set.qstats = &sch->qstats;
194         } else {
195                 opt.command = TC_RED_DESTROY;
196         }
197
198         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
199 }
200
201 static void red_destroy(struct Qdisc *sch)
202 {
203         struct red_sched_data *q = qdisc_priv(sch);
204
205         del_timer_sync(&q->adapt_timer);
206         red_offload(sch, false);
207         qdisc_put(q->qdisc);
208 }
209
210 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
211         [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
212         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
213         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
214         [TCA_RED_MAX_P] = { .type = NLA_U32 },
215         [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
216                             .validation_data = &red_supported_flags },
217 };
218
219 static int red_change(struct Qdisc *sch, struct nlattr *opt,
220                       struct netlink_ext_ack *extack)
221 {
222         struct Qdisc *old_child = NULL, *child = NULL;
223         struct red_sched_data *q = qdisc_priv(sch);
224         struct nlattr *tb[TCA_RED_MAX + 1];
225         struct nla_bitfield32 flags_bf;
226         struct tc_red_qopt *ctl;
227         unsigned char userbits;
228         unsigned char flags;
229         int err;
230         u32 max_P;
231
232         if (opt == NULL)
233                 return -EINVAL;
234
235         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
236                                           NULL);
237         if (err < 0)
238                 return err;
239
240         if (tb[TCA_RED_PARMS] == NULL ||
241             tb[TCA_RED_STAB] == NULL)
242                 return -EINVAL;
243
244         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
245
246         ctl = nla_data(tb[TCA_RED_PARMS]);
247         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
248                 return -EINVAL;
249
250         err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
251                             tb[TCA_RED_FLAGS], red_supported_flags,
252                             &flags_bf, &userbits, extack);
253         if (err)
254                 return err;
255
256         if (ctl->limit > 0) {
257                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
258                                          extack);
259                 if (IS_ERR(child))
260                         return PTR_ERR(child);
261
262                 /* child is fifo, no need to check for noop_qdisc */
263                 qdisc_hash_add(child, true);
264         }
265
266         sch_tree_lock(sch);
267
268         flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
269         err = red_validate_flags(flags, extack);
270         if (err)
271                 goto unlock_out;
272
273         q->flags = flags;
274         q->userbits = userbits;
275         q->limit = ctl->limit;
276         if (child) {
277                 qdisc_tree_flush_backlog(q->qdisc);
278                 old_child = q->qdisc;
279                 q->qdisc = child;
280         }
281
282         red_set_parms(&q->parms,
283                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
284                       ctl->Plog, ctl->Scell_log,
285                       nla_data(tb[TCA_RED_STAB]),
286                       max_P);
287         red_set_vars(&q->vars);
288
289         del_timer(&q->adapt_timer);
290         if (ctl->flags & TC_RED_ADAPTATIVE)
291                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
292
293         if (!q->qdisc->q.qlen)
294                 red_start_of_idle_period(&q->vars);
295
296         sch_tree_unlock(sch);
297
298         red_offload(sch, true);
299
300         if (old_child)
301                 qdisc_put(old_child);
302         return 0;
303
304 unlock_out:
305         sch_tree_unlock(sch);
306         if (child)
307                 qdisc_put(child);
308         return err;
309 }
310
311 static inline void red_adaptative_timer(struct timer_list *t)
312 {
313         struct red_sched_data *q = from_timer(q, t, adapt_timer);
314         struct Qdisc *sch = q->sch;
315         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
316
317         spin_lock(root_lock);
318         red_adaptative_algo(&q->parms, &q->vars);
319         mod_timer(&q->adapt_timer, jiffies + HZ/2);
320         spin_unlock(root_lock);
321 }
322
323 static int red_init(struct Qdisc *sch, struct nlattr *opt,
324                     struct netlink_ext_ack *extack)
325 {
326         struct red_sched_data *q = qdisc_priv(sch);
327
328         q->qdisc = &noop_qdisc;
329         q->sch = sch;
330         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
331         return red_change(sch, opt, extack);
332 }
333
334 static int red_dump_offload_stats(struct Qdisc *sch)
335 {
336         struct tc_red_qopt_offload hw_stats = {
337                 .command = TC_RED_STATS,
338                 .handle = sch->handle,
339                 .parent = sch->parent,
340                 {
341                         .stats.bstats = &sch->bstats,
342                         .stats.qstats = &sch->qstats,
343                 },
344         };
345
346         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
347 }
348
349 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
350 {
351         struct red_sched_data *q = qdisc_priv(sch);
352         struct nlattr *opts = NULL;
353         struct tc_red_qopt opt = {
354                 .limit          = q->limit,
355                 .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
356                                   q->userbits,
357                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
358                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
359                 .Wlog           = q->parms.Wlog,
360                 .Plog           = q->parms.Plog,
361                 .Scell_log      = q->parms.Scell_log,
362         };
363         int err;
364
365         err = red_dump_offload_stats(sch);
366         if (err)
367                 goto nla_put_failure;
368
369         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
370         if (opts == NULL)
371                 goto nla_put_failure;
372         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
373             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
374             nla_put_bitfield32(skb, TCA_RED_FLAGS,
375                                q->flags, red_supported_flags))
376                 goto nla_put_failure;
377         return nla_nest_end(skb, opts);
378
379 nla_put_failure:
380         nla_nest_cancel(skb, opts);
381         return -EMSGSIZE;
382 }
383
384 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
385 {
386         struct red_sched_data *q = qdisc_priv(sch);
387         struct net_device *dev = qdisc_dev(sch);
388         struct tc_red_xstats st = {0};
389
390         if (sch->flags & TCQ_F_OFFLOADED) {
391                 struct tc_red_qopt_offload hw_stats_request = {
392                         .command = TC_RED_XSTATS,
393                         .handle = sch->handle,
394                         .parent = sch->parent,
395                         {
396                                 .xstats = &q->stats,
397                         },
398                 };
399                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
400                                               &hw_stats_request);
401         }
402         st.early = q->stats.prob_drop + q->stats.forced_drop;
403         st.pdrop = q->stats.pdrop;
404         st.other = q->stats.other;
405         st.marked = q->stats.prob_mark + q->stats.forced_mark;
406
407         return gnet_stats_copy_app(d, &st, sizeof(st));
408 }
409
410 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
411                           struct sk_buff *skb, struct tcmsg *tcm)
412 {
413         struct red_sched_data *q = qdisc_priv(sch);
414
415         tcm->tcm_handle |= TC_H_MIN(1);
416         tcm->tcm_info = q->qdisc->handle;
417         return 0;
418 }
419
420 static void red_graft_offload(struct Qdisc *sch,
421                               struct Qdisc *new, struct Qdisc *old,
422                               struct netlink_ext_ack *extack)
423 {
424         struct tc_red_qopt_offload graft_offload = {
425                 .handle         = sch->handle,
426                 .parent         = sch->parent,
427                 .child_handle   = new->handle,
428                 .command        = TC_RED_GRAFT,
429         };
430
431         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
432                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
433 }
434
435 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
436                      struct Qdisc **old, struct netlink_ext_ack *extack)
437 {
438         struct red_sched_data *q = qdisc_priv(sch);
439
440         if (new == NULL)
441                 new = &noop_qdisc;
442
443         *old = qdisc_replace(sch, new, &q->qdisc);
444
445         red_graft_offload(sch, new, *old, extack);
446         return 0;
447 }
448
449 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
450 {
451         struct red_sched_data *q = qdisc_priv(sch);
452         return q->qdisc;
453 }
454
455 static unsigned long red_find(struct Qdisc *sch, u32 classid)
456 {
457         return 1;
458 }
459
460 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
461 {
462         if (!walker->stop) {
463                 if (walker->count >= walker->skip)
464                         if (walker->fn(sch, 1, walker) < 0) {
465                                 walker->stop = 1;
466                                 return;
467                         }
468                 walker->count++;
469         }
470 }
471
472 static const struct Qdisc_class_ops red_class_ops = {
473         .graft          =       red_graft,
474         .leaf           =       red_leaf,
475         .find           =       red_find,
476         .walk           =       red_walk,
477         .dump           =       red_dump_class,
478 };
479
480 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
481         .id             =       "red",
482         .priv_size      =       sizeof(struct red_sched_data),
483         .cl_ops         =       &red_class_ops,
484         .enqueue        =       red_enqueue,
485         .dequeue        =       red_dequeue,
486         .peek           =       red_peek,
487         .init           =       red_init,
488         .reset          =       red_reset,
489         .destroy        =       red_destroy,
490         .change         =       red_change,
491         .dump           =       red_dump,
492         .dump_stats     =       red_dump_stats,
493         .owner          =       THIS_MODULE,
494 };
495
496 static int __init red_module_init(void)
497 {
498         return register_qdisc(&red_qdisc_ops);
499 }
500
501 static void __exit red_module_exit(void)
502 {
503         unregister_qdisc(&red_qdisc_ops);
504 }
505
506 module_init(red_module_init)
507 module_exit(red_module_exit)
508
509 MODULE_LICENSE("GPL");