Commit | Line | Data |
---|---|---|
585d763a VCG |
1 | /* |
2 | * net/sched/sch_cbs.c Credit Based Shaper | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | * | |
9 | * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> | |
10 | * | |
11 | */ | |
12 | ||
13 | /* Credit Based Shaper (CBS) | |
14 | * ========================= | |
15 | * | |
16 | * This is a simple rate-limiting shaper aimed at TSN applications on | |
17 | * systems with known traffic workloads. | |
18 | * | |
19 | * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, | |
20 | * Section 8.6.8.2, and explained in more detail in the Annex L of the | |
21 | * same specification. | |
22 | * | |
23 | * There are four tunables to be considered: | |
24 | * | |
25 | * 'idleslope': Idleslope is the rate of credits that is | |
26 | * accumulated (in kilobits per second) when there is at least | |
27 | * one packet waiting for transmission. Packets are transmitted | |
28 | * when the current value of credits is equal or greater than | |
29 | * zero. When there is no packet to be transmitted the amount of | |
30 | * credits is set to zero. This is the main tunable of the CBS | |
31 | * algorithm. | |
32 | * | |
33 | * 'sendslope': | |
34 | * Sendslope is the rate of credits that is depleted (it should be a | |
35 | * negative number of kilobits per second) when a transmission is | |
36 | * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section | |
37 | * 8.6.8.2 item g): | |
38 | * | |
39 | * sendslope = idleslope - port_transmit_rate | |
40 | * | |
41 | * 'hicredit': Hicredit defines the maximum amount of credits (in | |
42 | * bytes) that can be accumulated. Hicredit depends on the | |
43 | * characteristics of interfering traffic, | |
44 | * 'max_interference_size' is the maximum size of any burst of | |
45 | * traffic that can delay the transmission of a frame that is | |
46 | * available for transmission for this traffic class, (IEEE | |
47 | * 802.1Q-2014 Annex L, Equation L-3): | |
48 | * | |
49 | * hicredit = max_interference_size * (idleslope / port_transmit_rate) | |
50 | * | |
51 | * 'locredit': Locredit is the minimum amount of credits that can | |
52 | * be reached. It is a function of the traffic flowing through | |
53 | * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): | |
54 | * | |
55 | * locredit = max_frame_size * (sendslope / port_transmit_rate) | |
56 | */ | |
57 | ||
58 | #include <linux/module.h> | |
59 | #include <linux/types.h> | |
60 | #include <linux/kernel.h> | |
61 | #include <linux/string.h> | |
62 | #include <linux/errno.h> | |
63 | #include <linux/skbuff.h> | |
64 | #include <net/netlink.h> | |
65 | #include <net/sch_generic.h> | |
66 | #include <net/pkt_sched.h> | |
67 | ||
68 | #define BYTES_PER_KBIT (1000LL / 8) | |
69 | ||
70 | struct cbs_sched_data { | |
3d0bd028 VCG |
71 | bool offload; |
72 | int queue; | |
585d763a VCG |
73 | s64 port_rate; /* in bytes/s */ |
74 | s64 last; /* timestamp in ns */ | |
75 | s64 credits; /* in bytes */ | |
76 | s32 locredit; /* in bytes */ | |
77 | s32 hicredit; /* in bytes */ | |
78 | s64 sendslope; /* in bytes/s */ | |
79 | s64 idleslope; /* in bytes/s */ | |
80 | struct qdisc_watchdog watchdog; | |
81 | int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch); | |
82 | struct sk_buff *(*dequeue)(struct Qdisc *sch); | |
83 | }; | |
84 | ||
3d0bd028 VCG |
85 | static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch) |
86 | { | |
87 | return qdisc_enqueue_tail(skb, sch); | |
88 | } | |
89 | ||
585d763a VCG |
90 | static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) |
91 | { | |
92 | struct cbs_sched_data *q = qdisc_priv(sch); | |
93 | ||
94 | if (sch->q.qlen == 0 && q->credits > 0) { | |
95 | /* We need to stop accumulating credits when there's | |
96 | * no enqueued packets and q->credits is positive. | |
97 | */ | |
98 | q->credits = 0; | |
99 | q->last = ktime_get_ns(); | |
100 | } | |
101 | ||
102 | return qdisc_enqueue_tail(skb, sch); | |
103 | } | |
104 | ||
105 | static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, | |
106 | struct sk_buff **to_free) | |
107 | { | |
108 | struct cbs_sched_data *q = qdisc_priv(sch); | |
109 | ||
110 | return q->enqueue(skb, sch); | |
111 | } | |
112 | ||
113 | /* timediff is in ns, slope is in bytes/s */ | |
114 | static s64 timediff_to_credits(s64 timediff, s64 slope) | |
115 | { | |
116 | return div64_s64(timediff * slope, NSEC_PER_SEC); | |
117 | } | |
118 | ||
119 | static s64 delay_from_credits(s64 credits, s64 slope) | |
120 | { | |
121 | if (unlikely(slope == 0)) | |
122 | return S64_MAX; | |
123 | ||
124 | return div64_s64(-credits * NSEC_PER_SEC, slope); | |
125 | } | |
126 | ||
127 | static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) | |
128 | { | |
129 | if (unlikely(port_rate == 0)) | |
130 | return S64_MAX; | |
131 | ||
132 | return div64_s64(len * slope, port_rate); | |
133 | } | |
134 | ||
135 | static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) | |
136 | { | |
137 | struct cbs_sched_data *q = qdisc_priv(sch); | |
138 | s64 now = ktime_get_ns(); | |
139 | struct sk_buff *skb; | |
140 | s64 credits; | |
141 | int len; | |
142 | ||
143 | if (q->credits < 0) { | |
144 | credits = timediff_to_credits(now - q->last, q->idleslope); | |
145 | ||
146 | credits = q->credits + credits; | |
147 | q->credits = min_t(s64, credits, q->hicredit); | |
148 | ||
149 | if (q->credits < 0) { | |
150 | s64 delay; | |
151 | ||
152 | delay = delay_from_credits(q->credits, q->idleslope); | |
153 | qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); | |
154 | ||
155 | q->last = now; | |
156 | ||
157 | return NULL; | |
158 | } | |
159 | } | |
160 | ||
161 | skb = qdisc_dequeue_head(sch); | |
162 | if (!skb) | |
163 | return NULL; | |
164 | ||
165 | len = qdisc_pkt_len(skb); | |
166 | ||
167 | /* As sendslope is a negative number, this will decrease the | |
168 | * amount of q->credits. | |
169 | */ | |
170 | credits = credits_from_len(len, q->sendslope, q->port_rate); | |
171 | credits += q->credits; | |
172 | ||
173 | q->credits = max_t(s64, credits, q->locredit); | |
174 | q->last = now; | |
175 | ||
176 | return skb; | |
177 | } | |
178 | ||
3d0bd028 VCG |
179 | static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) |
180 | { | |
181 | return qdisc_dequeue_head(sch); | |
182 | } | |
183 | ||
585d763a VCG |
184 | static struct sk_buff *cbs_dequeue(struct Qdisc *sch) |
185 | { | |
186 | struct cbs_sched_data *q = qdisc_priv(sch); | |
187 | ||
188 | return q->dequeue(sch); | |
189 | } | |
190 | ||
191 | static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { | |
192 | [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, | |
193 | }; | |
194 | ||
3d0bd028 VCG |
195 | static void cbs_disable_offload(struct net_device *dev, |
196 | struct cbs_sched_data *q) | |
197 | { | |
198 | struct tc_cbs_qopt_offload cbs = { }; | |
199 | const struct net_device_ops *ops; | |
200 | int err; | |
201 | ||
202 | if (!q->offload) | |
203 | return; | |
204 | ||
205 | q->enqueue = cbs_enqueue_soft; | |
206 | q->dequeue = cbs_dequeue_soft; | |
207 | ||
208 | ops = dev->netdev_ops; | |
209 | if (!ops->ndo_setup_tc) | |
210 | return; | |
211 | ||
212 | cbs.queue = q->queue; | |
213 | cbs.enable = 0; | |
214 | ||
8521db4c | 215 | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); |
3d0bd028 VCG |
216 | if (err < 0) |
217 | pr_warn("Couldn't disable CBS offload for queue %d\n", | |
218 | cbs.queue); | |
219 | } | |
220 | ||
221 | static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, | |
710fb396 AA |
222 | const struct tc_cbs_qopt *opt, |
223 | struct netlink_ext_ack *extack) | |
3d0bd028 VCG |
224 | { |
225 | const struct net_device_ops *ops = dev->netdev_ops; | |
226 | struct tc_cbs_qopt_offload cbs = { }; | |
227 | int err; | |
228 | ||
710fb396 AA |
229 | if (!ops->ndo_setup_tc) { |
230 | NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload"); | |
3d0bd028 | 231 | return -EOPNOTSUPP; |
710fb396 | 232 | } |
3d0bd028 VCG |
233 | |
234 | cbs.queue = q->queue; | |
235 | ||
236 | cbs.enable = 1; | |
237 | cbs.hicredit = opt->hicredit; | |
238 | cbs.locredit = opt->locredit; | |
239 | cbs.idleslope = opt->idleslope; | |
240 | cbs.sendslope = opt->sendslope; | |
241 | ||
8521db4c | 242 | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); |
710fb396 AA |
243 | if (err < 0) { |
244 | NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload"); | |
3d0bd028 | 245 | return err; |
710fb396 | 246 | } |
3d0bd028 VCG |
247 | |
248 | q->enqueue = cbs_enqueue_offload; | |
249 | q->dequeue = cbs_dequeue_offload; | |
250 | ||
251 | return 0; | |
252 | } | |
253 | ||
2030721c AA |
254 | static int cbs_change(struct Qdisc *sch, struct nlattr *opt, |
255 | struct netlink_ext_ack *extack) | |
585d763a VCG |
256 | { |
257 | struct cbs_sched_data *q = qdisc_priv(sch); | |
258 | struct net_device *dev = qdisc_dev(sch); | |
259 | struct nlattr *tb[TCA_CBS_MAX + 1]; | |
585d763a | 260 | struct tc_cbs_qopt *qopt; |
585d763a VCG |
261 | int err; |
262 | ||
710fb396 | 263 | err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, extack); |
585d763a VCG |
264 | if (err < 0) |
265 | return err; | |
266 | ||
710fb396 AA |
267 | if (!tb[TCA_CBS_PARMS]) { |
268 | NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory"); | |
585d763a | 269 | return -EINVAL; |
710fb396 | 270 | } |
585d763a VCG |
271 | |
272 | qopt = nla_data(tb[TCA_CBS_PARMS]); | |
273 | ||
3d0bd028 VCG |
274 | if (!qopt->offload) { |
275 | struct ethtool_link_ksettings ecmd; | |
276 | s64 link_speed; | |
585d763a | 277 | |
3d0bd028 VCG |
278 | if (!__ethtool_get_link_ksettings(dev, &ecmd)) |
279 | link_speed = ecmd.base.speed; | |
280 | else | |
281 | link_speed = SPEED_1000; | |
585d763a | 282 | |
3d0bd028 | 283 | q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; |
585d763a | 284 | |
3d0bd028 VCG |
285 | cbs_disable_offload(dev, q); |
286 | } else { | |
710fb396 | 287 | err = cbs_enable_offload(dev, q, qopt, extack); |
3d0bd028 VCG |
288 | if (err < 0) |
289 | return err; | |
290 | } | |
585d763a | 291 | |
3d0bd028 | 292 | /* Everything went OK, save the parameters used. */ |
585d763a VCG |
293 | q->hicredit = qopt->hicredit; |
294 | q->locredit = qopt->locredit; | |
295 | q->idleslope = qopt->idleslope * BYTES_PER_KBIT; | |
296 | q->sendslope = qopt->sendslope * BYTES_PER_KBIT; | |
3d0bd028 | 297 | q->offload = qopt->offload; |
585d763a VCG |
298 | |
299 | return 0; | |
300 | } | |
301 | ||
e63d7dfd AA |
302 | static int cbs_init(struct Qdisc *sch, struct nlattr *opt, |
303 | struct netlink_ext_ack *extack) | |
585d763a VCG |
304 | { |
305 | struct cbs_sched_data *q = qdisc_priv(sch); | |
3d0bd028 | 306 | struct net_device *dev = qdisc_dev(sch); |
585d763a | 307 | |
710fb396 AA |
308 | if (!opt) { |
309 | NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); | |
585d763a | 310 | return -EINVAL; |
710fb396 | 311 | } |
585d763a | 312 | |
3d0bd028 VCG |
313 | q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); |
314 | ||
315 | q->enqueue = cbs_enqueue_soft; | |
316 | q->dequeue = cbs_dequeue_soft; | |
317 | ||
585d763a VCG |
318 | qdisc_watchdog_init(&q->watchdog, sch); |
319 | ||
2030721c | 320 | return cbs_change(sch, opt, extack); |
585d763a VCG |
321 | } |
322 | ||
323 | static void cbs_destroy(struct Qdisc *sch) | |
324 | { | |
325 | struct cbs_sched_data *q = qdisc_priv(sch); | |
3d0bd028 | 326 | struct net_device *dev = qdisc_dev(sch); |
585d763a VCG |
327 | |
328 | qdisc_watchdog_cancel(&q->watchdog); | |
3d0bd028 VCG |
329 | |
330 | cbs_disable_offload(dev, q); | |
585d763a VCG |
331 | } |
332 | ||
333 | static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) | |
334 | { | |
335 | struct cbs_sched_data *q = qdisc_priv(sch); | |
336 | struct tc_cbs_qopt opt = { }; | |
337 | struct nlattr *nest; | |
338 | ||
339 | nest = nla_nest_start(skb, TCA_OPTIONS); | |
340 | if (!nest) | |
341 | goto nla_put_failure; | |
342 | ||
343 | opt.hicredit = q->hicredit; | |
344 | opt.locredit = q->locredit; | |
345 | opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); | |
346 | opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); | |
3d0bd028 | 347 | opt.offload = q->offload; |
585d763a VCG |
348 | |
349 | if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) | |
350 | goto nla_put_failure; | |
351 | ||
352 | return nla_nest_end(skb, nest); | |
353 | ||
354 | nla_put_failure: | |
355 | nla_nest_cancel(skb, nest); | |
356 | return -1; | |
357 | } | |
358 | ||
359 | static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { | |
360 | .id = "cbs", | |
361 | .priv_size = sizeof(struct cbs_sched_data), | |
362 | .enqueue = cbs_enqueue, | |
363 | .dequeue = cbs_dequeue, | |
364 | .peek = qdisc_peek_dequeued, | |
365 | .init = cbs_init, | |
366 | .reset = qdisc_reset_queue, | |
367 | .destroy = cbs_destroy, | |
368 | .change = cbs_change, | |
369 | .dump = cbs_dump, | |
370 | .owner = THIS_MODULE, | |
371 | }; | |
372 | ||
373 | static int __init cbs_module_init(void) | |
374 | { | |
375 | return register_qdisc(&cbs_qdisc_ops); | |
376 | } | |
377 | ||
378 | static void __exit cbs_module_exit(void) | |
379 | { | |
380 | unregister_qdisc(&cbs_qdisc_ops); | |
381 | } | |
382 | module_init(cbs_module_init) | |
383 | module_exit(cbs_module_exit) | |
384 | MODULE_LICENSE("GPL"); |