net: core: netlink: add helper refcount dec and lock function
[linux-2.6-block.git] / net / sched / sch_red.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_red.c Random Early Detection queue.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Changes:
dba051f3 12 * J Hadi Salim 980914: computation fixes
1da177e4 13 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
dba051f3 14 * J Hadi Salim 980816: ECN support
1da177e4
LT
15 */
16
1da177e4 17#include <linux/module.h>
1da177e4
LT
18#include <linux/types.h>
19#include <linux/kernel.h>
1da177e4 20#include <linux/skbuff.h>
1da177e4 21#include <net/pkt_sched.h>
602f3baf 22#include <net/pkt_cls.h>
1da177e4 23#include <net/inet_ecn.h>
6b31b28a 24#include <net/red.h>
1da177e4
LT
25
26
6b31b28a 27/* Parameters, settable by user:
1da177e4
LT
28 -----------------------------
29
30 limit - bytes (must be > qth_max + burst)
31
32 Hard limit on queue length, should be chosen >qth_max
33 to allow packet bursts. This parameter does not
34 affect the algorithms behaviour and can be chosen
35 arbitrarily high (well, less than ram size)
36 Really, this limit will never be reached
37 if RED works correctly.
1da177e4
LT
38 */
39
cc7ec456 40struct red_sched_data {
6b31b28a
TG
41 u32 limit; /* HARD maximal queue length */
42 unsigned char flags;
8af2a218 43 struct timer_list adapt_timer;
cdeabbb8 44 struct Qdisc *sch;
6b31b28a 45 struct red_parms parms;
eeca6688 46 struct red_vars vars;
6b31b28a 47 struct red_stats stats;
f38c39d6 48 struct Qdisc *qdisc;
1da177e4
LT
49};
50
6b31b28a 51static inline int red_use_ecn(struct red_sched_data *q)
1da177e4 52{
6b31b28a 53 return q->flags & TC_RED_ECN;
1da177e4
LT
54}
55
bdc450a0
TG
56static inline int red_use_harddrop(struct red_sched_data *q)
57{
58 return q->flags & TC_RED_HARDDROP;
59}
60
520ac30f
ED
61static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
62 struct sk_buff **to_free)
1da177e4
LT
63{
64 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6
PM
65 struct Qdisc *child = q->qdisc;
66 int ret;
1da177e4 67
eeca6688
ED
68 q->vars.qavg = red_calc_qavg(&q->parms,
69 &q->vars,
70 child->qstats.backlog);
1da177e4 71
eeca6688
ED
72 if (red_is_idling(&q->vars))
73 red_end_of_idle_period(&q->vars);
1da177e4 74
eeca6688 75 switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
cc7ec456
ED
76 case RED_DONT_MARK:
77 break;
78
79 case RED_PROB_MARK:
25331d6c 80 qdisc_qstats_overlimit(sch);
cc7ec456
ED
81 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
82 q->stats.prob_drop++;
83 goto congestion_drop;
84 }
85
86 q->stats.prob_mark++;
87 break;
88
89 case RED_HARD_MARK:
25331d6c 90 qdisc_qstats_overlimit(sch);
cc7ec456
ED
91 if (red_use_harddrop(q) || !red_use_ecn(q) ||
92 !INET_ECN_set_ce(skb)) {
93 q->stats.forced_drop++;
94 goto congestion_drop;
95 }
96
97 q->stats.forced_mark++;
98 break;
1da177e4
LT
99 }
100
520ac30f 101 ret = qdisc_enqueue(skb, child, to_free);
f38c39d6 102 if (likely(ret == NET_XMIT_SUCCESS)) {
d7f4f332 103 qdisc_qstats_backlog_inc(sch, skb);
f38c39d6 104 sch->q.qlen++;
378a2f09 105 } else if (net_xmit_drop_count(ret)) {
f38c39d6 106 q->stats.pdrop++;
25331d6c 107 qdisc_qstats_drop(sch);
f38c39d6
PM
108 }
109 return ret;
6b31b28a
TG
110
111congestion_drop:
520ac30f 112 qdisc_drop(skb, sch, to_free);
1da177e4
LT
113 return NET_XMIT_CN;
114}
115
cc7ec456 116static struct sk_buff *red_dequeue(struct Qdisc *sch)
1da177e4
LT
117{
118 struct sk_buff *skb;
119 struct red_sched_data *q = qdisc_priv(sch);
f38c39d6 120 struct Qdisc *child = q->qdisc;
1da177e4 121
f38c39d6 122 skb = child->dequeue(child);
9190b3b3
ED
123 if (skb) {
124 qdisc_bstats_update(sch, skb);
d7f4f332 125 qdisc_qstats_backlog_dec(sch, skb);
f38c39d6 126 sch->q.qlen--;
9190b3b3 127 } else {
eeca6688
ED
128 if (!red_is_idling(&q->vars))
129 red_start_of_idle_period(&q->vars);
9190b3b3 130 }
9e178ff2 131 return skb;
1da177e4
LT
132}
133
cc7ec456 134static struct sk_buff *red_peek(struct Qdisc *sch)
8e3af978
JP
135{
136 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc;
138
139 return child->ops->peek(child);
140}
141
cc7ec456 142static void red_reset(struct Qdisc *sch)
1da177e4
LT
143{
144 struct red_sched_data *q = qdisc_priv(sch);
145
f38c39d6 146 qdisc_reset(q->qdisc);
d7f4f332 147 sch->qstats.backlog = 0;
f38c39d6 148 sch->q.qlen = 0;
eeca6688 149 red_restart(&q->vars);
1da177e4
LT
150}
151
602f3baf
NF
152static int red_offload(struct Qdisc *sch, bool enable)
153{
154 struct red_sched_data *q = qdisc_priv(sch);
155 struct net_device *dev = qdisc_dev(sch);
156 struct tc_red_qopt_offload opt = {
157 .handle = sch->handle,
158 .parent = sch->parent,
159 };
160
161 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
162 return -EOPNOTSUPP;
163
164 if (enable) {
165 opt.command = TC_RED_REPLACE;
166 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
167 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
168 opt.set.probability = q->parms.max_P;
169 opt.set.is_ecn = red_use_ecn(q);
416ef9b1 170 opt.set.qstats = &sch->qstats;
602f3baf
NF
171 } else {
172 opt.command = TC_RED_DESTROY;
173 }
174
8234af2d 175 return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
602f3baf
NF
176}
177
f38c39d6
PM
178static void red_destroy(struct Qdisc *sch)
179{
180 struct red_sched_data *q = qdisc_priv(sch);
8af2a218
ED
181
182 del_timer_sync(&q->adapt_timer);
602f3baf 183 red_offload(sch, false);
f38c39d6
PM
184 qdisc_destroy(q->qdisc);
185}
186
27a3421e
PM
187static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
188 [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
189 [TCA_RED_STAB] = { .len = RED_STAB_SIZE },
a73ed26b 190 [TCA_RED_MAX_P] = { .type = NLA_U32 },
27a3421e
PM
191};
192
2030721c
AA
193static int red_change(struct Qdisc *sch, struct nlattr *opt,
194 struct netlink_ext_ack *extack)
1da177e4
LT
195{
196 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 197 struct nlattr *tb[TCA_RED_MAX + 1];
1da177e4 198 struct tc_red_qopt *ctl;
f38c39d6 199 struct Qdisc *child = NULL;
cee63723 200 int err;
a73ed26b 201 u32 max_P;
1da177e4 202
cee63723 203 if (opt == NULL)
dba051f3
TG
204 return -EINVAL;
205
fceb6435 206 err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy, NULL);
cee63723
PM
207 if (err < 0)
208 return err;
209
1e90474c 210 if (tb[TCA_RED_PARMS] == NULL ||
27a3421e 211 tb[TCA_RED_STAB] == NULL)
1da177e4
LT
212 return -EINVAL;
213
a73ed26b
ED
214 max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
215
1e90474c 216 ctl = nla_data(tb[TCA_RED_PARMS]);
8afa10cb
NF
217 if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
218 return -EINVAL;
1da177e4 219
f38c39d6 220 if (ctl->limit > 0) {
a38a9882
AA
221 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
222 extack);
fb0305ce
PM
223 if (IS_ERR(child))
224 return PTR_ERR(child);
f38c39d6 225
44a63b13 226 /* child is fifo, no need to check for noop_qdisc */
49b49971 227 qdisc_hash_add(child, true);
44a63b13
PA
228 }
229
1da177e4
LT
230 sch_tree_lock(sch);
231 q->flags = ctl->flags;
1da177e4 232 q->limit = ctl->limit;
5e50da01 233 if (child) {
2ccccf5f
WC
234 qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
235 q->qdisc->qstats.backlog);
b94c8afc
PM
236 qdisc_destroy(q->qdisc);
237 q->qdisc = child;
5e50da01 238 }
1da177e4 239
eeca6688
ED
240 red_set_parms(&q->parms,
241 ctl->qth_min, ctl->qth_max, ctl->Wlog,
a73ed26b
ED
242 ctl->Plog, ctl->Scell_log,
243 nla_data(tb[TCA_RED_STAB]),
244 max_P);
eeca6688 245 red_set_vars(&q->vars);
6b31b28a 246
8af2a218
ED
247 del_timer(&q->adapt_timer);
248 if (ctl->flags & TC_RED_ADAPTATIVE)
249 mod_timer(&q->adapt_timer, jiffies + HZ/2);
250
1ee5fa1e 251 if (!q->qdisc->q.qlen)
eeca6688 252 red_start_of_idle_period(&q->vars);
dba051f3 253
1da177e4 254 sch_tree_unlock(sch);
602f3baf 255 red_offload(sch, true);
1da177e4
LT
256 return 0;
257}
258
cdeabbb8 259static inline void red_adaptative_timer(struct timer_list *t)
8af2a218 260{
cdeabbb8
KC
261 struct red_sched_data *q = from_timer(q, t, adapt_timer);
262 struct Qdisc *sch = q->sch;
8af2a218
ED
263 spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
264
265 spin_lock(root_lock);
eeca6688 266 red_adaptative_algo(&q->parms, &q->vars);
8af2a218
ED
267 mod_timer(&q->adapt_timer, jiffies + HZ/2);
268 spin_unlock(root_lock);
269}
270
e63d7dfd
AA
271static int red_init(struct Qdisc *sch, struct nlattr *opt,
272 struct netlink_ext_ack *extack)
1da177e4 273{
f38c39d6
PM
274 struct red_sched_data *q = qdisc_priv(sch);
275
276 q->qdisc = &noop_qdisc;
cdeabbb8
KC
277 q->sch = sch;
278 timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
2030721c 279 return red_change(sch, opt, extack);
1da177e4
LT
280}
281
428a68af 282static int red_dump_offload_stats(struct Qdisc *sch, struct tc_red_qopt *opt)
602f3baf
NF
283{
284 struct net_device *dev = qdisc_dev(sch);
285 struct tc_red_qopt_offload hw_stats = {
ee9d3429 286 .command = TC_RED_STATS,
602f3baf
NF
287 .handle = sch->handle,
288 .parent = sch->parent,
ee9d3429
AM
289 {
290 .stats.bstats = &sch->bstats,
291 .stats.qstats = &sch->qstats,
292 },
602f3baf 293 };
8234af2d
NF
294 int err;
295
296 sch->flags &= ~TCQ_F_OFFLOADED;
602f3baf 297
8234af2d
NF
298 if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
299 return 0;
300
301 err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
302 &hw_stats);
303 if (err == -EOPNOTSUPP)
602f3baf
NF
304 return 0;
305
8234af2d
NF
306 if (!err)
307 sch->flags |= TCQ_F_OFFLOADED;
308
309 return err;
602f3baf
NF
310}
311
1da177e4
LT
312static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
313{
314 struct red_sched_data *q = qdisc_priv(sch);
1e90474c 315 struct nlattr *opts = NULL;
6b31b28a
TG
316 struct tc_red_qopt opt = {
317 .limit = q->limit,
318 .flags = q->flags,
319 .qth_min = q->parms.qth_min >> q->parms.Wlog,
320 .qth_max = q->parms.qth_max >> q->parms.Wlog,
321 .Wlog = q->parms.Wlog,
322 .Plog = q->parms.Plog,
323 .Scell_log = q->parms.Scell_log,
324 };
602f3baf 325 int err;
1da177e4 326
428a68af 327 err = red_dump_offload_stats(sch, &opt);
602f3baf
NF
328 if (err)
329 goto nla_put_failure;
330
1e90474c
PM
331 opts = nla_nest_start(skb, TCA_OPTIONS);
332 if (opts == NULL)
333 goto nla_put_failure;
1b34ec43
DM
334 if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
335 nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
336 goto nla_put_failure;
1e90474c 337 return nla_nest_end(skb, opts);
1da177e4 338
1e90474c 339nla_put_failure:
bc3ed28c
TG
340 nla_nest_cancel(skb, opts);
341 return -EMSGSIZE;
1da177e4
LT
342}
343
344static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
345{
346 struct red_sched_data *q = qdisc_priv(sch);
602f3baf 347 struct net_device *dev = qdisc_dev(sch);
f8253df5 348 struct tc_red_xstats st = {0};
6b31b28a 349
428a68af 350 if (sch->flags & TCQ_F_OFFLOADED) {
602f3baf 351 struct tc_red_qopt_offload hw_stats_request = {
ee9d3429 352 .command = TC_RED_XSTATS,
602f3baf
NF
353 .handle = sch->handle,
354 .parent = sch->parent,
ee9d3429 355 {
f8253df5 356 .xstats = &q->stats,
ee9d3429 357 },
602f3baf 358 };
f8253df5
NF
359 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
360 &hw_stats_request);
602f3baf 361 }
f8253df5
NF
362 st.early = q->stats.prob_drop + q->stats.forced_drop;
363 st.pdrop = q->stats.pdrop;
364 st.other = q->stats.other;
365 st.marked = q->stats.prob_mark + q->stats.forced_mark;
602f3baf 366
6b31b28a 367 return gnet_stats_copy_app(d, &st, sizeof(st));
1da177e4
LT
368}
369
f38c39d6
PM
370static int red_dump_class(struct Qdisc *sch, unsigned long cl,
371 struct sk_buff *skb, struct tcmsg *tcm)
372{
373 struct red_sched_data *q = qdisc_priv(sch);
374
f38c39d6
PM
375 tcm->tcm_handle |= TC_H_MIN(1);
376 tcm->tcm_info = q->qdisc->handle;
377 return 0;
378}
379
380static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 381 struct Qdisc **old, struct netlink_ext_ack *extack)
f38c39d6
PM
382{
383 struct red_sched_data *q = qdisc_priv(sch);
384
385 if (new == NULL)
386 new = &noop_qdisc;
387
86a7996c 388 *old = qdisc_replace(sch, new, &q->qdisc);
f38c39d6
PM
389 return 0;
390}
391
392static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
393{
394 struct red_sched_data *q = qdisc_priv(sch);
395 return q->qdisc;
396}
397
143976ce 398static unsigned long red_find(struct Qdisc *sch, u32 classid)
f38c39d6
PM
399{
400 return 1;
401}
402
f38c39d6
PM
403static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
404{
405 if (!walker->stop) {
406 if (walker->count >= walker->skip)
407 if (walker->fn(sch, 1, walker) < 0) {
408 walker->stop = 1;
409 return;
410 }
411 walker->count++;
412 }
413}
414
20fea08b 415static const struct Qdisc_class_ops red_class_ops = {
f38c39d6
PM
416 .graft = red_graft,
417 .leaf = red_leaf,
143976ce 418 .find = red_find,
f38c39d6 419 .walk = red_walk,
f38c39d6
PM
420 .dump = red_dump_class,
421};
422
20fea08b 423static struct Qdisc_ops red_qdisc_ops __read_mostly = {
1da177e4
LT
424 .id = "red",
425 .priv_size = sizeof(struct red_sched_data),
f38c39d6 426 .cl_ops = &red_class_ops,
1da177e4
LT
427 .enqueue = red_enqueue,
428 .dequeue = red_dequeue,
8e3af978 429 .peek = red_peek,
1da177e4
LT
430 .init = red_init,
431 .reset = red_reset,
f38c39d6 432 .destroy = red_destroy,
1da177e4
LT
433 .change = red_change,
434 .dump = red_dump,
435 .dump_stats = red_dump_stats,
436 .owner = THIS_MODULE,
437};
438
439static int __init red_module_init(void)
440{
441 return register_qdisc(&red_qdisc_ops);
442}
dba051f3
TG
443
444static void __exit red_module_exit(void)
1da177e4
LT
445{
446 unregister_qdisc(&red_qdisc_ops);
447}
dba051f3 448
1da177e4
LT
449module_init(red_module_init)
450module_exit(red_module_exit)
dba051f3 451
1da177e4 452MODULE_LICENSE("GPL");