Commit | Line | Data |
---|---|---|
585d763a VCG |
1 | /* |
2 | * net/sched/sch_cbs.c Credit Based Shaper | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> | |
10 | * | |
11 | */ | |
12 | ||
13 | /* Credit Based Shaper (CBS) | |
14 | * ========================= | |
15 | * | |
16 | * This is a simple rate-limiting shaper aimed at TSN applications on | |
17 | * systems with known traffic workloads. | |
18 | * | |
19 | * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, | |
20 | * Section 8.6.8.2, and explained in more detail in the Annex L of the | |
21 | * same specification. | |
22 | * | |
23 | * There are four tunables to be considered: | |
24 | * | |
25 | * 'idleslope': Idleslope is the rate of credits that is | |
26 | * accumulated (in kilobits per second) when there is at least | |
27 | * one packet waiting for transmission. Packets are transmitted | |
28 | * when the current value of credits is equal or greater than | |
29 | * zero. When there is no packet to be transmitted the amount of | |
30 | * credits is set to zero. This is the main tunable of the CBS | |
31 | * algorithm. | |
32 | * | |
33 | * 'sendslope': | |
34 | * Sendslope is the rate of credits that is depleted (it should be a | |
35 | * negative number of kilobits per second) when a transmission is | |
36 | * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section | |
37 | * 8.6.8.2 item g): | |
38 | * | |
39 | * sendslope = idleslope - port_transmit_rate | |
40 | * | |
41 | * 'hicredit': Hicredit defines the maximum amount of credits (in | |
42 | * bytes) that can be accumulated. Hicredit depends on the | |
43 | * characteristics of interfering traffic, | |
44 | * 'max_interference_size' is the maximum size of any burst of | |
45 | * traffic that can delay the transmission of a frame that is | |
46 | * available for transmission for this traffic class, (IEEE | |
47 | * 802.1Q-2014 Annex L, Equation L-3): | |
48 | * | |
49 | * hicredit = max_interference_size * (idleslope / port_transmit_rate) | |
50 | * | |
51 | * 'locredit': Locredit is the minimum amount of credits that can | |
52 | * be reached. It is a function of the traffic flowing through | |
53 | * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): | |
54 | * | |
55 | * locredit = max_frame_size * (sendslope / port_transmit_rate) | |
56 | */ | |
57 | ||
58 | #include <linux/module.h> | |
59 | #include <linux/types.h> | |
60 | #include <linux/kernel.h> | |
61 | #include <linux/string.h> | |
62 | #include <linux/errno.h> | |
63 | #include <linux/skbuff.h> | |
64 | #include <net/netlink.h> | |
65 | #include <net/sch_generic.h> | |
66 | #include <net/pkt_sched.h> | |
67 | ||
68 | #define BYTES_PER_KBIT (1000LL / 8) | |
69 | ||
70 | struct cbs_sched_data { | |
3d0bd028 VCG |
71 | bool offload; |
72 | int queue; | |
585d763a VCG |
73 | s64 port_rate; /* in bytes/s */ |
74 | s64 last; /* timestamp in ns */ | |
75 | s64 credits; /* in bytes */ | |
76 | s32 locredit; /* in bytes */ | |
77 | s32 hicredit; /* in bytes */ | |
78 | s64 sendslope; /* in bytes/s */ | |
79 | s64 idleslope; /* in bytes/s */ | |
80 | struct qdisc_watchdog watchdog; | |
990e35ec VCG |
81 | int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, |
82 | struct sk_buff **to_free); | |
585d763a | 83 | struct sk_buff *(*dequeue)(struct Qdisc *sch); |
990e35ec | 84 | struct Qdisc *qdisc; |
585d763a VCG |
85 | }; |
86 | ||
990e35ec VCG |
87 | static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
88 | struct Qdisc *child, | |
89 | struct sk_buff **to_free) | |
3d0bd028 | 90 | { |
990e35ec VCG |
91 | int err; |
92 | ||
93 | err = child->ops->enqueue(skb, child, to_free); | |
94 | if (err != NET_XMIT_SUCCESS) | |
95 | return err; | |
96 | ||
97 | qdisc_qstats_backlog_inc(sch, skb); | |
98 | sch->q.qlen++; | |
99 | ||
100 | return NET_XMIT_SUCCESS; | |
3d0bd028 VCG |
101 | } |
102 | ||
990e35ec VCG |
103 | static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, |
104 | struct sk_buff **to_free) | |
585d763a VCG |
105 | { |
106 | struct cbs_sched_data *q = qdisc_priv(sch); | |
990e35ec VCG |
107 | struct Qdisc *qdisc = q->qdisc; |
108 | ||
109 | return cbs_child_enqueue(skb, sch, qdisc, to_free); | |
110 | } | |
111 | ||
112 | static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, | |
113 | struct sk_buff **to_free) | |
114 | { | |
115 | struct cbs_sched_data *q = qdisc_priv(sch); | |
116 | struct Qdisc *qdisc = q->qdisc; | |
585d763a VCG |
117 | |
118 | if (sch->q.qlen == 0 && q->credits > 0) { | |
119 | /* We need to stop accumulating credits when there's | |
120 | * no enqueued packets and q->credits is positive. | |
121 | */ | |
122 | q->credits = 0; | |
123 | q->last = ktime_get_ns(); | |
124 | } | |
125 | ||
990e35ec | 126 | return cbs_child_enqueue(skb, sch, qdisc, to_free); |
585d763a VCG |
127 | } |
128 | ||
129 | static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, | |
130 | struct sk_buff **to_free) | |
131 | { | |
132 | struct cbs_sched_data *q = qdisc_priv(sch); | |
133 | ||
990e35ec | 134 | return q->enqueue(skb, sch, to_free); |
585d763a VCG |
135 | } |
136 | ||
137 | /* timediff is in ns, slope is in bytes/s */ | |
138 | static s64 timediff_to_credits(s64 timediff, s64 slope) | |
139 | { | |
140 | return div64_s64(timediff * slope, NSEC_PER_SEC); | |
141 | } | |
142 | ||
143 | static s64 delay_from_credits(s64 credits, s64 slope) | |
144 | { | |
145 | if (unlikely(slope == 0)) | |
146 | return S64_MAX; | |
147 | ||
148 | return div64_s64(-credits * NSEC_PER_SEC, slope); | |
149 | } | |
150 | ||
151 | static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) | |
152 | { | |
153 | if (unlikely(port_rate == 0)) | |
154 | return S64_MAX; | |
155 | ||
156 | return div64_s64(len * slope, port_rate); | |
157 | } | |
158 | ||
990e35ec VCG |
159 | static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child) |
160 | { | |
161 | struct sk_buff *skb; | |
162 | ||
163 | skb = child->ops->dequeue(child); | |
164 | if (!skb) | |
165 | return NULL; | |
166 | ||
167 | qdisc_qstats_backlog_dec(sch, skb); | |
168 | qdisc_bstats_update(sch, skb); | |
169 | sch->q.qlen--; | |
170 | ||
171 | return skb; | |
172 | } | |
173 | ||
585d763a VCG |
174 | static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) |
175 | { | |
176 | struct cbs_sched_data *q = qdisc_priv(sch); | |
990e35ec | 177 | struct Qdisc *qdisc = q->qdisc; |
585d763a VCG |
178 | s64 now = ktime_get_ns(); |
179 | struct sk_buff *skb; | |
180 | s64 credits; | |
181 | int len; | |
182 | ||
183 | if (q->credits < 0) { | |
184 | credits = timediff_to_credits(now - q->last, q->idleslope); | |
185 | ||
186 | credits = q->credits + credits; | |
187 | q->credits = min_t(s64, credits, q->hicredit); | |
188 | ||
189 | if (q->credits < 0) { | |
190 | s64 delay; | |
191 | ||
192 | delay = delay_from_credits(q->credits, q->idleslope); | |
193 | qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); | |
194 | ||
195 | q->last = now; | |
196 | ||
197 | return NULL; | |
198 | } | |
199 | } | |
990e35ec | 200 | skb = cbs_child_dequeue(sch, qdisc); |
585d763a VCG |
201 | if (!skb) |
202 | return NULL; | |
203 | ||
204 | len = qdisc_pkt_len(skb); | |
205 | ||
206 | /* As sendslope is a negative number, this will decrease the | |
207 | * amount of q->credits. | |
208 | */ | |
209 | credits = credits_from_len(len, q->sendslope, q->port_rate); | |
210 | credits += q->credits; | |
211 | ||
212 | q->credits = max_t(s64, credits, q->locredit); | |
213 | q->last = now; | |
214 | ||
215 | return skb; | |
216 | } | |
217 | ||
3d0bd028 VCG |
218 | static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) |
219 | { | |
990e35ec VCG |
220 | struct cbs_sched_data *q = qdisc_priv(sch); |
221 | struct Qdisc *qdisc = q->qdisc; | |
222 | ||
223 | return cbs_child_dequeue(sch, qdisc); | |
3d0bd028 VCG |
224 | } |
225 | ||
585d763a VCG |
226 | static struct sk_buff *cbs_dequeue(struct Qdisc *sch) |
227 | { | |
228 | struct cbs_sched_data *q = qdisc_priv(sch); | |
229 | ||
230 | return q->dequeue(sch); | |
231 | } | |
232 | ||
233 | static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { | |
234 | [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, | |
235 | }; | |
236 | ||
3d0bd028 VCG |
237 | static void cbs_disable_offload(struct net_device *dev, |
238 | struct cbs_sched_data *q) | |
239 | { | |
240 | struct tc_cbs_qopt_offload cbs = { }; | |
241 | const struct net_device_ops *ops; | |
242 | int err; | |
243 | ||
244 | if (!q->offload) | |
245 | return; | |
246 | ||
247 | q->enqueue = cbs_enqueue_soft; | |
248 | q->dequeue = cbs_dequeue_soft; | |
249 | ||
250 | ops = dev->netdev_ops; | |
251 | if (!ops->ndo_setup_tc) | |
252 | return; | |
253 | ||
254 | cbs.queue = q->queue; | |
255 | cbs.enable = 0; | |
256 | ||
8521db4c | 257 | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); |
3d0bd028 VCG |
258 | if (err < 0) |
259 | pr_warn("Couldn't disable CBS offload for queue %d\n", | |
260 | cbs.queue); | |
261 | } | |
262 | ||
263 | static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, | |
710fb396 AA |
264 | const struct tc_cbs_qopt *opt, |
265 | struct netlink_ext_ack *extack) | |
3d0bd028 VCG |
266 | { |
267 | const struct net_device_ops *ops = dev->netdev_ops; | |
268 | struct tc_cbs_qopt_offload cbs = { }; | |
269 | int err; | |
270 | ||
710fb396 AA |
271 | if (!ops->ndo_setup_tc) { |
272 | NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload"); | |
3d0bd028 | 273 | return -EOPNOTSUPP; |
710fb396 | 274 | } |
3d0bd028 VCG |
275 | |
276 | cbs.queue = q->queue; | |
277 | ||
278 | cbs.enable = 1; | |
279 | cbs.hicredit = opt->hicredit; | |
280 | cbs.locredit = opt->locredit; | |
281 | cbs.idleslope = opt->idleslope; | |
282 | cbs.sendslope = opt->sendslope; | |
283 | ||
8521db4c | 284 | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); |
710fb396 AA |
285 | if (err < 0) { |
286 | NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload"); | |
3d0bd028 | 287 | return err; |
710fb396 | 288 | } |
3d0bd028 VCG |
289 | |
290 | q->enqueue = cbs_enqueue_offload; | |
291 | q->dequeue = cbs_dequeue_offload; | |
292 | ||
293 | return 0; | |
294 | } | |
295 | ||
2030721c AA |
296 | static int cbs_change(struct Qdisc *sch, struct nlattr *opt, |
297 | struct netlink_ext_ack *extack) | |
585d763a VCG |
298 | { |
299 | struct cbs_sched_data *q = qdisc_priv(sch); | |
300 | struct net_device *dev = qdisc_dev(sch); | |
301 | struct nlattr *tb[TCA_CBS_MAX + 1]; | |
585d763a | 302 | struct tc_cbs_qopt *qopt; |
585d763a VCG |
303 | int err; |
304 | ||
710fb396 | 305 | err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack); |
585d763a VCG |
306 | if (err < 0) |
307 | return err; | |
308 | ||
710fb396 AA |
309 | if (!tb[TCA_CBS_PARMS]) { |
310 | NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory"); | |
585d763a | 311 | return -EINVAL; |
710fb396 | 312 | } |
585d763a VCG |
313 | |
314 | qopt = nla_data(tb[TCA_CBS_PARMS]); | |
315 | ||
3d0bd028 VCG |
316 | if (!qopt->offload) { |
317 | struct ethtool_link_ksettings ecmd; | |
318 | s64 link_speed; | |
585d763a | 319 | |
3d0bd028 VCG |
320 | if (!__ethtool_get_link_ksettings(dev, &ecmd)) |
321 | link_speed = ecmd.base.speed; | |
322 | else | |
323 | link_speed = SPEED_1000; | |
585d763a | 324 | |
3d0bd028 | 325 | q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; |
585d763a | 326 | |
3d0bd028 VCG |
327 | cbs_disable_offload(dev, q); |
328 | } else { | |
710fb396 | 329 | err = cbs_enable_offload(dev, q, qopt, extack); |
3d0bd028 VCG |
330 | if (err < 0) |
331 | return err; | |
332 | } | |
585d763a | 333 | |
3d0bd028 | 334 | /* Everything went OK, save the parameters used. */ |
585d763a VCG |
335 | q->hicredit = qopt->hicredit; |
336 | q->locredit = qopt->locredit; | |
337 | q->idleslope = qopt->idleslope * BYTES_PER_KBIT; | |
338 | q->sendslope = qopt->sendslope * BYTES_PER_KBIT; | |
3d0bd028 | 339 | q->offload = qopt->offload; |
585d763a VCG |
340 | |
341 | return 0; | |
342 | } | |
343 | ||
e63d7dfd AA |
344 | static int cbs_init(struct Qdisc *sch, struct nlattr *opt, |
345 | struct netlink_ext_ack *extack) | |
585d763a VCG |
346 | { |
347 | struct cbs_sched_data *q = qdisc_priv(sch); | |
3d0bd028 | 348 | struct net_device *dev = qdisc_dev(sch); |
585d763a | 349 | |
710fb396 AA |
350 | if (!opt) { |
351 | NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); | |
585d763a | 352 | return -EINVAL; |
710fb396 | 353 | } |
585d763a | 354 | |
990e35ec VCG |
355 | q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
356 | sch->handle, extack); | |
357 | if (!q->qdisc) | |
358 | return -ENOMEM; | |
359 | ||
360 | qdisc_hash_add(q->qdisc, false); | |
361 | ||
3d0bd028 VCG |
362 | q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); |
363 | ||
364 | q->enqueue = cbs_enqueue_soft; | |
365 | q->dequeue = cbs_dequeue_soft; | |
366 | ||
585d763a VCG |
367 | qdisc_watchdog_init(&q->watchdog, sch); |
368 | ||
2030721c | 369 | return cbs_change(sch, opt, extack); |
585d763a VCG |
370 | } |
371 | ||
372 | static void cbs_destroy(struct Qdisc *sch) | |
373 | { | |
374 | struct cbs_sched_data *q = qdisc_priv(sch); | |
3d0bd028 | 375 | struct net_device *dev = qdisc_dev(sch); |
585d763a VCG |
376 | |
377 | qdisc_watchdog_cancel(&q->watchdog); | |
3d0bd028 VCG |
378 | |
379 | cbs_disable_offload(dev, q); | |
990e35ec VCG |
380 | |
381 | if (q->qdisc) | |
86bd446b | 382 | qdisc_put(q->qdisc); |
585d763a VCG |
383 | } |
384 | ||
385 | static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) | |
386 | { | |
387 | struct cbs_sched_data *q = qdisc_priv(sch); | |
388 | struct tc_cbs_qopt opt = { }; | |
389 | struct nlattr *nest; | |
390 | ||
391 | nest = nla_nest_start(skb, TCA_OPTIONS); | |
392 | if (!nest) | |
393 | goto nla_put_failure; | |
394 | ||
395 | opt.hicredit = q->hicredit; | |
396 | opt.locredit = q->locredit; | |
397 | opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); | |
398 | opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); | |
3d0bd028 | 399 | opt.offload = q->offload; |
585d763a VCG |
400 | |
401 | if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) | |
402 | goto nla_put_failure; | |
403 | ||
404 | return nla_nest_end(skb, nest); | |
405 | ||
406 | nla_put_failure: | |
407 | nla_nest_cancel(skb, nest); | |
408 | return -1; | |
409 | } | |
410 | ||
990e35ec VCG |
411 | static int cbs_dump_class(struct Qdisc *sch, unsigned long cl, |
412 | struct sk_buff *skb, struct tcmsg *tcm) | |
413 | { | |
414 | struct cbs_sched_data *q = qdisc_priv(sch); | |
415 | ||
416 | if (cl != 1 || !q->qdisc) /* only one class */ | |
417 | return -ENOENT; | |
418 | ||
419 | tcm->tcm_handle |= TC_H_MIN(1); | |
420 | tcm->tcm_info = q->qdisc->handle; | |
421 | ||
422 | return 0; | |
423 | } | |
424 | ||
425 | static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |
426 | struct Qdisc **old, struct netlink_ext_ack *extack) | |
427 | { | |
428 | struct cbs_sched_data *q = qdisc_priv(sch); | |
429 | ||
430 | if (!new) { | |
431 | new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, | |
432 | sch->handle, NULL); | |
433 | if (!new) | |
434 | new = &noop_qdisc; | |
435 | } | |
436 | ||
437 | *old = qdisc_replace(sch, new, &q->qdisc); | |
438 | return 0; | |
439 | } | |
440 | ||
441 | static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg) | |
442 | { | |
443 | struct cbs_sched_data *q = qdisc_priv(sch); | |
444 | ||
445 | return q->qdisc; | |
446 | } | |
447 | ||
448 | static unsigned long cbs_find(struct Qdisc *sch, u32 classid) | |
449 | { | |
450 | return 1; | |
451 | } | |
452 | ||
453 | static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker) | |
454 | { | |
455 | if (!walker->stop) { | |
456 | if (walker->count >= walker->skip) { | |
457 | if (walker->fn(sch, 1, walker) < 0) { | |
458 | walker->stop = 1; | |
459 | return; | |
460 | } | |
461 | } | |
462 | walker->count++; | |
463 | } | |
464 | } | |
465 | ||
466 | static const struct Qdisc_class_ops cbs_class_ops = { | |
467 | .graft = cbs_graft, | |
468 | .leaf = cbs_leaf, | |
469 | .find = cbs_find, | |
470 | .walk = cbs_walk, | |
471 | .dump = cbs_dump_class, | |
472 | }; | |
473 | ||
585d763a VCG |
474 | static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { |
475 | .id = "cbs", | |
990e35ec | 476 | .cl_ops = &cbs_class_ops, |
585d763a VCG |
477 | .priv_size = sizeof(struct cbs_sched_data), |
478 | .enqueue = cbs_enqueue, | |
479 | .dequeue = cbs_dequeue, | |
480 | .peek = qdisc_peek_dequeued, | |
481 | .init = cbs_init, | |
482 | .reset = qdisc_reset_queue, | |
483 | .destroy = cbs_destroy, | |
484 | .change = cbs_change, | |
485 | .dump = cbs_dump, | |
486 | .owner = THIS_MODULE, | |
487 | }; | |
488 | ||
489 | static int __init cbs_module_init(void) | |
490 | { | |
491 | return register_qdisc(&cbs_qdisc_ops); | |
492 | } | |
493 | ||
494 | static void __exit cbs_module_exit(void) | |
495 | { | |
496 | unregister_qdisc(&cbs_qdisc_ops); | |
497 | } | |
498 | module_init(cbs_module_init) | |
499 | module_exit(cbs_module_exit) | |
500 | MODULE_LICENSE("GPL"); |