Merge tag 'seccomp-v5.5-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/kees...
[linux-block.git] / net / sched / sch_etf.c
CommitLineData
25db26a9
VCG
1// SPDX-License-Identifier: GPL-2.0
2
3/* net/sched/sch_etf.c Earliest TxTime First queueing discipline.
4 *
5 * Authors: Jesus Sanchez-Palencia <jesus.sanchez-palencia@intel.com>
6 * Vinicius Costa Gomes <vinicius.gomes@intel.com>
7 */
8
9#include <linux/module.h>
10#include <linux/types.h>
11#include <linux/kernel.h>
12#include <linux/string.h>
13#include <linux/errno.h>
4b15c707 14#include <linux/errqueue.h>
25db26a9
VCG
15#include <linux/rbtree.h>
16#include <linux/skbuff.h>
17#include <linux/posix-timers.h>
18#include <net/netlink.h>
19#include <net/sch_generic.h>
20#include <net/pkt_sched.h>
21#include <net/sock.h>
22
23#define DEADLINE_MODE_IS_ON(x) ((x)->flags & TC_ETF_DEADLINE_MODE_ON)
88cab771 24#define OFFLOAD_IS_ON(x) ((x)->flags & TC_ETF_OFFLOAD_ON)
d14d2b20 25#define SKIP_SOCK_CHECK_IS_SET(x) ((x)->flags & TC_ETF_SKIP_SOCK_CHECK)
25db26a9
VCG
26
27struct etf_sched_data {
88cab771 28 bool offload;
25db26a9 29 bool deadline_mode;
d14d2b20 30 bool skip_sock_check;
25db26a9
VCG
31 int clockid;
32 int queue;
33 s32 delta; /* in ns */
34 ktime_t last; /* The txtime of the last skb sent to the netdevice. */
09fd4860 35 struct rb_root_cached head;
25db26a9
VCG
36 struct qdisc_watchdog watchdog;
37 ktime_t (*get_time)(void);
38};
39
40static const struct nla_policy etf_policy[TCA_ETF_MAX + 1] = {
41 [TCA_ETF_PARMS] = { .len = sizeof(struct tc_etf_qopt) },
42};
43
44static inline int validate_input_params(struct tc_etf_qopt *qopt,
45 struct netlink_ext_ack *extack)
46{
47 /* Check if params comply to the following rules:
48 * * Clockid and delta must be valid.
49 *
50 * * Dynamic clockids are not supported.
51 *
52 * * Delta must be a positive integer.
88cab771
JSP
53 *
54 * Also note that for the HW offload case, we must
55 * expect that system clocks have been synchronized to PHC.
25db26a9
VCG
56 */
57 if (qopt->clockid < 0) {
58 NL_SET_ERR_MSG(extack, "Dynamic clockids are not supported");
59 return -ENOTSUPP;
60 }
61
62 if (qopt->clockid != CLOCK_TAI) {
63 NL_SET_ERR_MSG(extack, "Invalid clockid. CLOCK_TAI must be used");
64 return -EINVAL;
65 }
66
67 if (qopt->delta < 0) {
68 NL_SET_ERR_MSG(extack, "Delta must be positive");
69 return -EINVAL;
70 }
71
72 return 0;
73}
74
75static bool is_packet_valid(struct Qdisc *sch, struct sk_buff *nskb)
76{
77 struct etf_sched_data *q = qdisc_priv(sch);
78 ktime_t txtime = nskb->tstamp;
79 struct sock *sk = nskb->sk;
80 ktime_t now;
81
d14d2b20
VP
82 if (q->skip_sock_check)
83 goto skip;
84
25db26a9
VCG
85 if (!sk)
86 return false;
87
88 if (!sock_flag(sk, SOCK_TXTIME))
89 return false;
90
91 /* We don't perform crosstimestamping.
92 * Drop if packet's clockid differs from qdisc's.
93 */
94 if (sk->sk_clockid != q->clockid)
95 return false;
96
97 if (sk->sk_txtime_deadline_mode != q->deadline_mode)
98 return false;
99
d14d2b20 100skip:
25db26a9
VCG
101 now = q->get_time();
102 if (ktime_before(txtime, now) || ktime_before(txtime, q->last))
103 return false;
104
105 return true;
106}
107
108static struct sk_buff *etf_peek_timesortedlist(struct Qdisc *sch)
109{
110 struct etf_sched_data *q = qdisc_priv(sch);
111 struct rb_node *p;
112
09fd4860 113 p = rb_first_cached(&q->head);
25db26a9
VCG
114 if (!p)
115 return NULL;
116
117 return rb_to_skb(p);
118}
119
120static void reset_watchdog(struct Qdisc *sch)
121{
122 struct etf_sched_data *q = qdisc_priv(sch);
123 struct sk_buff *skb = etf_peek_timesortedlist(sch);
124 ktime_t next;
125
3fcbdaee
JSP
126 if (!skb) {
127 qdisc_watchdog_cancel(&q->watchdog);
25db26a9 128 return;
3fcbdaee 129 }
25db26a9
VCG
130
131 next = ktime_sub_ns(skb->tstamp, q->delta);
132 qdisc_watchdog_schedule_ns(&q->watchdog, ktime_to_ns(next));
133}
134
4b15c707
JSP
135static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
136{
137 struct sock_exterr_skb *serr;
138 struct sk_buff *clone;
139 ktime_t txtime = skb->tstamp;
140
141 if (!skb->sk || !(skb->sk->sk_txtime_report_errors))
142 return;
143
144 clone = skb_clone(skb, GFP_ATOMIC);
145 if (!clone)
146 return;
147
148 serr = SKB_EXT_ERR(clone);
149 serr->ee.ee_errno = err;
150 serr->ee.ee_origin = SO_EE_ORIGIN_TXTIME;
151 serr->ee.ee_type = 0;
152 serr->ee.ee_code = code;
153 serr->ee.ee_pad = 0;
154 serr->ee.ee_data = (txtime >> 32); /* high part of tstamp */
155 serr->ee.ee_info = txtime; /* low part of tstamp */
156
157 if (sock_queue_err_skb(skb->sk, clone))
158 kfree_skb(clone);
159}
160
25db26a9
VCG
161static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
162 struct sk_buff **to_free)
163{
164 struct etf_sched_data *q = qdisc_priv(sch);
09fd4860 165 struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
25db26a9 166 ktime_t txtime = nskb->tstamp;
09fd4860 167 bool leftmost = true;
25db26a9 168
4b15c707
JSP
169 if (!is_packet_valid(sch, nskb)) {
170 report_sock_error(nskb, EINVAL,
171 SO_EE_CODE_TXTIME_INVALID_PARAM);
25db26a9 172 return qdisc_drop(nskb, sch, to_free);
4b15c707 173 }
25db26a9
VCG
174
175 while (*p) {
176 struct sk_buff *skb;
177
178 parent = *p;
179 skb = rb_to_skb(parent);
28aa7c86 180 if (ktime_compare(txtime, skb->tstamp) >= 0) {
25db26a9 181 p = &parent->rb_right;
09fd4860
JSP
182 leftmost = false;
183 } else {
25db26a9 184 p = &parent->rb_left;
09fd4860 185 }
25db26a9
VCG
186 }
187 rb_link_node(&nskb->rbnode, parent, p);
09fd4860 188 rb_insert_color_cached(&nskb->rbnode, &q->head, leftmost);
25db26a9
VCG
189
190 qdisc_qstats_backlog_inc(sch, nskb);
191 sch->q.qlen++;
192
193 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
194 reset_watchdog(sch);
195
196 return NET_XMIT_SUCCESS;
197}
198
37342bda
JSP
199static void timesortedlist_drop(struct Qdisc *sch, struct sk_buff *skb,
200 ktime_t now)
25db26a9
VCG
201{
202 struct etf_sched_data *q = qdisc_priv(sch);
cbeeb8ef 203 struct sk_buff *to_free = NULL;
37342bda 204 struct sk_buff *tmp = NULL;
25db26a9 205
37342bda
JSP
206 skb_rbtree_walk_from_safe(skb, tmp) {
207 if (ktime_after(skb->tstamp, now))
208 break;
25db26a9 209
37342bda 210 rb_erase_cached(&skb->rbnode, &q->head);
25db26a9 211
37342bda
JSP
212 /* The rbnode field in the skb re-uses these fields, now that
213 * we are done with the rbnode, reset them.
214 */
215 skb->next = NULL;
216 skb->prev = NULL;
217 skb->dev = qdisc_dev(sch);
25db26a9 218
37342bda 219 report_sock_error(skb, ECANCELED, SO_EE_CODE_TXTIME_MISSED);
25db26a9 220
37342bda
JSP
221 qdisc_qstats_backlog_dec(sch, skb);
222 qdisc_drop(skb, sch, &to_free);
223 qdisc_qstats_overlimit(sch);
224 sch->q.qlen--;
225 }
4b15c707 226
37342bda 227 kfree_skb_list(to_free);
cbeeb8ef 228}
25db26a9 229
cbeeb8ef
JSP
230static void timesortedlist_remove(struct Qdisc *sch, struct sk_buff *skb)
231{
232 struct etf_sched_data *q = qdisc_priv(sch);
233
234 rb_erase_cached(&skb->rbnode, &q->head);
235
236 /* The rbnode field in the skb re-uses these fields, now that
237 * we are done with the rbnode, reset them.
238 */
239 skb->next = NULL;
240 skb->prev = NULL;
241 skb->dev = qdisc_dev(sch);
242
243 qdisc_qstats_backlog_dec(sch, skb);
244
245 qdisc_bstats_update(sch, skb);
246
247 q->last = skb->tstamp;
25db26a9
VCG
248
249 sch->q.qlen--;
250}
251
252static struct sk_buff *etf_dequeue_timesortedlist(struct Qdisc *sch)
253{
254 struct etf_sched_data *q = qdisc_priv(sch);
255 struct sk_buff *skb;
256 ktime_t now, next;
257
258 skb = etf_peek_timesortedlist(sch);
259 if (!skb)
260 return NULL;
261
262 now = q->get_time();
263
264 /* Drop if packet has expired while in queue. */
25db26a9 265 if (ktime_before(skb->tstamp, now)) {
37342bda 266 timesortedlist_drop(sch, skb, now);
25db26a9
VCG
267 skb = NULL;
268 goto out;
269 }
270
271 /* When in deadline mode, dequeue as soon as possible and change the
272 * txtime from deadline to (now + delta).
273 */
274 if (q->deadline_mode) {
cbeeb8ef 275 timesortedlist_remove(sch, skb);
25db26a9
VCG
276 skb->tstamp = now;
277 goto out;
278 }
279
280 next = ktime_sub_ns(skb->tstamp, q->delta);
281
282 /* Dequeue only if now is within the [txtime - delta, txtime] range. */
283 if (ktime_after(now, next))
cbeeb8ef 284 timesortedlist_remove(sch, skb);
25db26a9
VCG
285 else
286 skb = NULL;
287
288out:
289 /* Now we may need to re-arm the qdisc watchdog for the next packet. */
290 reset_watchdog(sch);
291
292 return skb;
293}
294
88cab771
JSP
295static void etf_disable_offload(struct net_device *dev,
296 struct etf_sched_data *q)
297{
298 struct tc_etf_qopt_offload etf = { };
299 const struct net_device_ops *ops;
300 int err;
301
302 if (!q->offload)
303 return;
304
305 ops = dev->netdev_ops;
306 if (!ops->ndo_setup_tc)
307 return;
308
309 etf.queue = q->queue;
310 etf.enable = 0;
311
312 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
313 if (err < 0)
314 pr_warn("Couldn't disable ETF offload for queue %d\n",
315 etf.queue);
316}
317
318static int etf_enable_offload(struct net_device *dev, struct etf_sched_data *q,
319 struct netlink_ext_ack *extack)
320{
321 const struct net_device_ops *ops = dev->netdev_ops;
322 struct tc_etf_qopt_offload etf = { };
323 int err;
324
325 if (q->offload)
326 return 0;
327
328 if (!ops->ndo_setup_tc) {
329 NL_SET_ERR_MSG(extack, "Specified device does not support ETF offload");
330 return -EOPNOTSUPP;
331 }
332
333 etf.queue = q->queue;
334 etf.enable = 1;
335
336 err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETF, &etf);
337 if (err < 0) {
338 NL_SET_ERR_MSG(extack, "Specified device failed to setup ETF hardware offload");
339 return err;
340 }
341
342 return 0;
343}
344
25db26a9
VCG
345static int etf_init(struct Qdisc *sch, struct nlattr *opt,
346 struct netlink_ext_ack *extack)
347{
348 struct etf_sched_data *q = qdisc_priv(sch);
349 struct net_device *dev = qdisc_dev(sch);
350 struct nlattr *tb[TCA_ETF_MAX + 1];
351 struct tc_etf_qopt *qopt;
352 int err;
353
354 if (!opt) {
355 NL_SET_ERR_MSG(extack,
356 "Missing ETF qdisc options which are mandatory");
357 return -EINVAL;
358 }
359
8cb08174
JB
360 err = nla_parse_nested_deprecated(tb, TCA_ETF_MAX, opt, etf_policy,
361 extack);
25db26a9
VCG
362 if (err < 0)
363 return err;
364
365 if (!tb[TCA_ETF_PARMS]) {
366 NL_SET_ERR_MSG(extack, "Missing mandatory ETF parameters");
367 return -EINVAL;
368 }
369
370 qopt = nla_data(tb[TCA_ETF_PARMS]);
371
88cab771 372 pr_debug("delta %d clockid %d offload %s deadline %s\n",
25db26a9 373 qopt->delta, qopt->clockid,
88cab771 374 OFFLOAD_IS_ON(qopt) ? "on" : "off",
25db26a9
VCG
375 DEADLINE_MODE_IS_ON(qopt) ? "on" : "off");
376
377 err = validate_input_params(qopt, extack);
378 if (err < 0)
379 return err;
380
381 q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0);
382
88cab771
JSP
383 if (OFFLOAD_IS_ON(qopt)) {
384 err = etf_enable_offload(dev, q, extack);
385 if (err < 0)
386 return err;
387 }
388
25db26a9
VCG
389 /* Everything went OK, save the parameters used. */
390 q->delta = qopt->delta;
391 q->clockid = qopt->clockid;
88cab771 392 q->offload = OFFLOAD_IS_ON(qopt);
25db26a9 393 q->deadline_mode = DEADLINE_MODE_IS_ON(qopt);
d14d2b20 394 q->skip_sock_check = SKIP_SOCK_CHECK_IS_SET(qopt);
25db26a9
VCG
395
396 switch (q->clockid) {
397 case CLOCK_REALTIME:
398 q->get_time = ktime_get_real;
399 break;
400 case CLOCK_MONOTONIC:
401 q->get_time = ktime_get;
402 break;
403 case CLOCK_BOOTTIME:
404 q->get_time = ktime_get_boottime;
405 break;
406 case CLOCK_TAI:
407 q->get_time = ktime_get_clocktai;
408 break;
409 default:
410 NL_SET_ERR_MSG(extack, "Clockid is not supported");
411 return -ENOTSUPP;
412 }
413
414 qdisc_watchdog_init_clockid(&q->watchdog, sch, q->clockid);
415
416 return 0;
417}
418
419static void timesortedlist_clear(struct Qdisc *sch)
420{
421 struct etf_sched_data *q = qdisc_priv(sch);
09fd4860 422 struct rb_node *p = rb_first_cached(&q->head);
25db26a9
VCG
423
424 while (p) {
425 struct sk_buff *skb = rb_to_skb(p);
426
427 p = rb_next(p);
428
09fd4860 429 rb_erase_cached(&skb->rbnode, &q->head);
25db26a9
VCG
430 rtnl_kfree_skbs(skb, skb);
431 sch->q.qlen--;
432 }
433}
434
435static void etf_reset(struct Qdisc *sch)
436{
437 struct etf_sched_data *q = qdisc_priv(sch);
438
439 /* Only cancel watchdog if it's been initialized. */
440 if (q->watchdog.qdisc == sch)
441 qdisc_watchdog_cancel(&q->watchdog);
442
443 /* No matter which mode we are on, it's safe to clear both lists. */
444 timesortedlist_clear(sch);
445 __qdisc_reset_queue(&sch->q);
446
447 sch->qstats.backlog = 0;
448 sch->q.qlen = 0;
449
450 q->last = 0;
451}
452
453static void etf_destroy(struct Qdisc *sch)
454{
455 struct etf_sched_data *q = qdisc_priv(sch);
88cab771 456 struct net_device *dev = qdisc_dev(sch);
25db26a9
VCG
457
458 /* Only cancel watchdog if it's been initialized. */
459 if (q->watchdog.qdisc == sch)
460 qdisc_watchdog_cancel(&q->watchdog);
88cab771
JSP
461
462 etf_disable_offload(dev, q);
25db26a9
VCG
463}
464
465static int etf_dump(struct Qdisc *sch, struct sk_buff *skb)
466{
467 struct etf_sched_data *q = qdisc_priv(sch);
468 struct tc_etf_qopt opt = { };
469 struct nlattr *nest;
470
ae0be8de 471 nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
25db26a9
VCG
472 if (!nest)
473 goto nla_put_failure;
474
475 opt.delta = q->delta;
476 opt.clockid = q->clockid;
88cab771
JSP
477 if (q->offload)
478 opt.flags |= TC_ETF_OFFLOAD_ON;
479
25db26a9
VCG
480 if (q->deadline_mode)
481 opt.flags |= TC_ETF_DEADLINE_MODE_ON;
482
d14d2b20
VP
483 if (q->skip_sock_check)
484 opt.flags |= TC_ETF_SKIP_SOCK_CHECK;
485
25db26a9
VCG
486 if (nla_put(skb, TCA_ETF_PARMS, sizeof(opt), &opt))
487 goto nla_put_failure;
488
489 return nla_nest_end(skb, nest);
490
491nla_put_failure:
492 nla_nest_cancel(skb, nest);
493 return -1;
494}
495
496static struct Qdisc_ops etf_qdisc_ops __read_mostly = {
497 .id = "etf",
498 .priv_size = sizeof(struct etf_sched_data),
499 .enqueue = etf_enqueue_timesortedlist,
500 .dequeue = etf_dequeue_timesortedlist,
501 .peek = etf_peek_timesortedlist,
502 .init = etf_init,
503 .reset = etf_reset,
504 .destroy = etf_destroy,
505 .dump = etf_dump,
506 .owner = THIS_MODULE,
507};
508
509static int __init etf_module_init(void)
510{
511 return register_qdisc(&etf_qdisc_ops);
512}
513
514static void __exit etf_module_exit(void)
515{
516 unregister_qdisc(&etf_qdisc_ops);
517}
518module_init(etf_module_init)
519module_exit(etf_module_exit)
520MODULE_LICENSE("GPL");