Commit | Line | Data |
---|---|---|
5a781ccb VCG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* net/sched/sch_taprio.c Time Aware Priority Scheduler | |
4 | * | |
5 | * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> | |
6 | * | |
7 | */ | |
8 | ||
9 | #include <linux/types.h> | |
10 | #include <linux/slab.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/string.h> | |
13 | #include <linux/list.h> | |
14 | #include <linux/errno.h> | |
15 | #include <linux/skbuff.h> | |
23bddf69 | 16 | #include <linux/math64.h> |
5a781ccb VCG |
17 | #include <linux/module.h> |
18 | #include <linux/spinlock.h> | |
a3d43c0d | 19 | #include <linux/rcupdate.h> |
5a781ccb VCG |
20 | #include <net/netlink.h> |
21 | #include <net/pkt_sched.h> | |
22 | #include <net/pkt_cls.h> | |
23 | #include <net/sch_generic.h> | |
4cfd5779 | 24 | #include <net/sock.h> |
54002066 | 25 | #include <net/tcp.h> |
5a781ccb | 26 | |
7b9eba7b LD |
27 | static LIST_HEAD(taprio_list); |
28 | static DEFINE_SPINLOCK(taprio_list_lock); | |
29 | ||
5a781ccb VCG |
30 | #define TAPRIO_ALL_GATES_OPEN -1 |
31 | ||
4cfd5779 | 32 | #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) |
9c66d156 | 33 | #define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) |
4cfd5779 | 34 | |
5a781ccb VCG |
35 | struct sched_entry { |
36 | struct list_head list; | |
37 | ||
38 | /* The instant that this entry "closes" and the next one | |
39 | * should open, the qdisc will make some effort so that no | |
40 | * packet leaves after this time. | |
41 | */ | |
42 | ktime_t close_time; | |
4cfd5779 | 43 | ktime_t next_txtime; |
5a781ccb VCG |
44 | atomic_t budget; |
45 | int index; | |
46 | u32 gate_mask; | |
47 | u32 interval; | |
48 | u8 command; | |
49 | }; | |
50 | ||
a3d43c0d VCG |
51 | struct sched_gate_list { |
52 | struct rcu_head rcu; | |
53 | struct list_head entries; | |
54 | size_t num_entries; | |
6ca6a665 VCG |
55 | ktime_t cycle_close_time; |
56 | s64 cycle_time; | |
c25031e9 | 57 | s64 cycle_time_extension; |
a3d43c0d VCG |
58 | s64 base_time; |
59 | }; | |
60 | ||
5a781ccb VCG |
61 | struct taprio_sched { |
62 | struct Qdisc **qdiscs; | |
63 | struct Qdisc *root; | |
4cfd5779 | 64 | u32 flags; |
7ede7b03 | 65 | enum tk_offsets tk_offset; |
5a781ccb | 66 | int clockid; |
7b9eba7b LD |
67 | atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ |
68 | * speeds it's sub-nanoseconds per byte | |
69 | */ | |
5a781ccb VCG |
70 | |
71 | /* Protects the update side of the RCU protected current_entry */ | |
72 | spinlock_t current_entry_lock; | |
73 | struct sched_entry __rcu *current_entry; | |
a3d43c0d VCG |
74 | struct sched_gate_list __rcu *oper_sched; |
75 | struct sched_gate_list __rcu *admin_sched; | |
5a781ccb | 76 | struct hrtimer advance_timer; |
7b9eba7b | 77 | struct list_head taprio_list; |
9c66d156 VCG |
78 | struct sk_buff *(*dequeue)(struct Qdisc *sch); |
79 | struct sk_buff *(*peek)(struct Qdisc *sch); | |
a5b64700 | 80 | u32 txtime_delay; |
5a781ccb VCG |
81 | }; |
82 | ||
9c66d156 VCG |
83 | struct __tc_taprio_qopt_offload { |
84 | refcount_t users; | |
85 | struct tc_taprio_qopt_offload offload; | |
86 | }; | |
87 | ||
a3d43c0d VCG |
88 | static ktime_t sched_base_time(const struct sched_gate_list *sched) |
89 | { | |
90 | if (!sched) | |
91 | return KTIME_MAX; | |
92 | ||
93 | return ns_to_ktime(sched->base_time); | |
94 | } | |
95 | ||
7ede7b03 VP |
96 | static ktime_t taprio_get_time(struct taprio_sched *q) |
97 | { | |
98 | ktime_t mono = ktime_get(); | |
99 | ||
100 | switch (q->tk_offset) { | |
101 | case TK_OFFS_MAX: | |
102 | return mono; | |
103 | default: | |
104 | return ktime_mono_to_any(mono, q->tk_offset); | |
105 | } | |
106 | ||
107 | return KTIME_MAX; | |
108 | } | |
109 | ||
a3d43c0d VCG |
110 | static void taprio_free_sched_cb(struct rcu_head *head) |
111 | { | |
112 | struct sched_gate_list *sched = container_of(head, struct sched_gate_list, rcu); | |
113 | struct sched_entry *entry, *n; | |
114 | ||
115 | if (!sched) | |
116 | return; | |
117 | ||
118 | list_for_each_entry_safe(entry, n, &sched->entries, list) { | |
119 | list_del(&entry->list); | |
120 | kfree(entry); | |
121 | } | |
122 | ||
123 | kfree(sched); | |
124 | } | |
125 | ||
126 | static void switch_schedules(struct taprio_sched *q, | |
127 | struct sched_gate_list **admin, | |
128 | struct sched_gate_list **oper) | |
129 | { | |
130 | rcu_assign_pointer(q->oper_sched, *admin); | |
131 | rcu_assign_pointer(q->admin_sched, NULL); | |
132 | ||
133 | if (*oper) | |
134 | call_rcu(&(*oper)->rcu, taprio_free_sched_cb); | |
135 | ||
136 | *oper = *admin; | |
137 | *admin = NULL; | |
138 | } | |
139 | ||
4cfd5779 VP |
140 | /* Get how much time has been already elapsed in the current cycle. */ |
141 | static s32 get_cycle_time_elapsed(struct sched_gate_list *sched, ktime_t time) | |
142 | { | |
143 | ktime_t time_since_sched_start; | |
144 | s32 time_elapsed; | |
145 | ||
146 | time_since_sched_start = ktime_sub(time, sched->base_time); | |
147 | div_s64_rem(time_since_sched_start, sched->cycle_time, &time_elapsed); | |
148 | ||
149 | return time_elapsed; | |
150 | } | |
151 | ||
152 | static ktime_t get_interval_end_time(struct sched_gate_list *sched, | |
153 | struct sched_gate_list *admin, | |
154 | struct sched_entry *entry, | |
155 | ktime_t intv_start) | |
156 | { | |
157 | s32 cycle_elapsed = get_cycle_time_elapsed(sched, intv_start); | |
158 | ktime_t intv_end, cycle_ext_end, cycle_end; | |
159 | ||
160 | cycle_end = ktime_add_ns(intv_start, sched->cycle_time - cycle_elapsed); | |
161 | intv_end = ktime_add_ns(intv_start, entry->interval); | |
162 | cycle_ext_end = ktime_add(cycle_end, sched->cycle_time_extension); | |
163 | ||
164 | if (ktime_before(intv_end, cycle_end)) | |
165 | return intv_end; | |
166 | else if (admin && admin != sched && | |
167 | ktime_after(admin->base_time, cycle_end) && | |
168 | ktime_before(admin->base_time, cycle_ext_end)) | |
169 | return admin->base_time; | |
170 | else | |
171 | return cycle_end; | |
172 | } | |
173 | ||
174 | static int length_to_duration(struct taprio_sched *q, int len) | |
175 | { | |
176 | return div_u64(len * atomic64_read(&q->picos_per_byte), 1000); | |
177 | } | |
178 | ||
179 | /* Returns the entry corresponding to next available interval. If | |
180 | * validate_interval is set, it only validates whether the timestamp occurs | |
181 | * when the gate corresponding to the skb's traffic class is open. | |
182 | */ | |
183 | static struct sched_entry *find_entry_to_transmit(struct sk_buff *skb, | |
184 | struct Qdisc *sch, | |
185 | struct sched_gate_list *sched, | |
186 | struct sched_gate_list *admin, | |
187 | ktime_t time, | |
188 | ktime_t *interval_start, | |
189 | ktime_t *interval_end, | |
190 | bool validate_interval) | |
191 | { | |
192 | ktime_t curr_intv_start, curr_intv_end, cycle_end, packet_transmit_time; | |
193 | ktime_t earliest_txtime = KTIME_MAX, txtime, cycle, transmit_end_time; | |
194 | struct sched_entry *entry = NULL, *entry_found = NULL; | |
195 | struct taprio_sched *q = qdisc_priv(sch); | |
196 | struct net_device *dev = qdisc_dev(sch); | |
197 | bool entry_available = false; | |
198 | s32 cycle_elapsed; | |
199 | int tc, n; | |
200 | ||
201 | tc = netdev_get_prio_tc_map(dev, skb->priority); | |
202 | packet_transmit_time = length_to_duration(q, qdisc_pkt_len(skb)); | |
203 | ||
204 | *interval_start = 0; | |
205 | *interval_end = 0; | |
206 | ||
207 | if (!sched) | |
208 | return NULL; | |
209 | ||
210 | cycle = sched->cycle_time; | |
211 | cycle_elapsed = get_cycle_time_elapsed(sched, time); | |
212 | curr_intv_end = ktime_sub_ns(time, cycle_elapsed); | |
213 | cycle_end = ktime_add_ns(curr_intv_end, cycle); | |
214 | ||
215 | list_for_each_entry(entry, &sched->entries, list) { | |
216 | curr_intv_start = curr_intv_end; | |
217 | curr_intv_end = get_interval_end_time(sched, admin, entry, | |
218 | curr_intv_start); | |
219 | ||
220 | if (ktime_after(curr_intv_start, cycle_end)) | |
221 | break; | |
222 | ||
223 | if (!(entry->gate_mask & BIT(tc)) || | |
224 | packet_transmit_time > entry->interval) | |
225 | continue; | |
226 | ||
227 | txtime = entry->next_txtime; | |
228 | ||
229 | if (ktime_before(txtime, time) || validate_interval) { | |
230 | transmit_end_time = ktime_add_ns(time, packet_transmit_time); | |
231 | if ((ktime_before(curr_intv_start, time) && | |
232 | ktime_before(transmit_end_time, curr_intv_end)) || | |
233 | (ktime_after(curr_intv_start, time) && !validate_interval)) { | |
234 | entry_found = entry; | |
235 | *interval_start = curr_intv_start; | |
236 | *interval_end = curr_intv_end; | |
237 | break; | |
238 | } else if (!entry_available && !validate_interval) { | |
239 | /* Here, we are just trying to find out the | |
240 | * first available interval in the next cycle. | |
241 | */ | |
242 | entry_available = 1; | |
243 | entry_found = entry; | |
244 | *interval_start = ktime_add_ns(curr_intv_start, cycle); | |
245 | *interval_end = ktime_add_ns(curr_intv_end, cycle); | |
246 | } | |
247 | } else if (ktime_before(txtime, earliest_txtime) && | |
248 | !entry_available) { | |
249 | earliest_txtime = txtime; | |
250 | entry_found = entry; | |
251 | n = div_s64(ktime_sub(txtime, curr_intv_start), cycle); | |
252 | *interval_start = ktime_add(curr_intv_start, n * cycle); | |
253 | *interval_end = ktime_add(curr_intv_end, n * cycle); | |
254 | } | |
255 | } | |
256 | ||
257 | return entry_found; | |
258 | } | |
259 | ||
260 | static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) | |
261 | { | |
262 | struct taprio_sched *q = qdisc_priv(sch); | |
263 | struct sched_gate_list *sched, *admin; | |
264 | ktime_t interval_start, interval_end; | |
265 | struct sched_entry *entry; | |
266 | ||
267 | rcu_read_lock(); | |
268 | sched = rcu_dereference(q->oper_sched); | |
269 | admin = rcu_dereference(q->admin_sched); | |
270 | ||
271 | entry = find_entry_to_transmit(skb, sch, sched, admin, skb->tstamp, | |
272 | &interval_start, &interval_end, true); | |
273 | rcu_read_unlock(); | |
274 | ||
275 | return entry; | |
276 | } | |
277 | ||
9c66d156 VCG |
278 | static bool taprio_flags_valid(u32 flags) |
279 | { | |
280 | /* Make sure no other flag bits are set. */ | |
281 | if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | | |
282 | TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) | |
283 | return false; | |
284 | /* txtime-assist and full offload are mutually exclusive */ | |
285 | if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && | |
286 | (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) | |
287 | return false; | |
288 | return true; | |
289 | } | |
290 | ||
54002066 VP |
291 | /* This returns the tstamp value set by TCP in terms of the set clock. */ |
292 | static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) | |
293 | { | |
294 | unsigned int offset = skb_network_offset(skb); | |
295 | const struct ipv6hdr *ipv6h; | |
296 | const struct iphdr *iph; | |
297 | struct ipv6hdr _ipv6h; | |
298 | ||
299 | ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); | |
300 | if (!ipv6h) | |
301 | return 0; | |
302 | ||
303 | if (ipv6h->version == 4) { | |
304 | iph = (struct iphdr *)ipv6h; | |
305 | offset += iph->ihl * 4; | |
306 | ||
307 | /* special-case 6in4 tunnelling, as that is a common way to get | |
308 | * v6 connectivity in the home | |
309 | */ | |
310 | if (iph->protocol == IPPROTO_IPV6) { | |
311 | ipv6h = skb_header_pointer(skb, offset, | |
312 | sizeof(_ipv6h), &_ipv6h); | |
313 | ||
314 | if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP) | |
315 | return 0; | |
316 | } else if (iph->protocol != IPPROTO_TCP) { | |
317 | return 0; | |
318 | } | |
319 | } else if (ipv6h->version == 6 && ipv6h->nexthdr != IPPROTO_TCP) { | |
320 | return 0; | |
321 | } | |
322 | ||
323 | return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset); | |
324 | } | |
325 | ||
4cfd5779 VP |
326 | /* There are a few scenarios where we will have to modify the txtime from |
327 | * what is read from next_txtime in sched_entry. They are: | |
328 | * 1. If txtime is in the past, | |
329 | * a. The gate for the traffic class is currently open and packet can be | |
330 | * transmitted before it closes, schedule the packet right away. | |
331 | * b. If the gate corresponding to the traffic class is going to open later | |
332 | * in the cycle, set the txtime of packet to the interval start. | |
333 | * 2. If txtime is in the future, there are packets corresponding to the | |
334 | * current traffic class waiting to be transmitted. So, the following | |
335 | * possibilities exist: | |
336 | * a. We can transmit the packet before the window containing the txtime | |
337 | * closes. | |
338 | * b. The window might close before the transmission can be completed | |
339 | * successfully. So, schedule the packet in the next open window. | |
340 | */ | |
341 | static long get_packet_txtime(struct sk_buff *skb, struct Qdisc *sch) | |
342 | { | |
54002066 | 343 | ktime_t transmit_end_time, interval_end, interval_start, tcp_tstamp; |
4cfd5779 VP |
344 | struct taprio_sched *q = qdisc_priv(sch); |
345 | struct sched_gate_list *sched, *admin; | |
346 | ktime_t minimum_time, now, txtime; | |
347 | int len, packet_transmit_time; | |
348 | struct sched_entry *entry; | |
349 | bool sched_changed; | |
350 | ||
7ede7b03 | 351 | now = taprio_get_time(q); |
4cfd5779 VP |
352 | minimum_time = ktime_add_ns(now, q->txtime_delay); |
353 | ||
54002066 VP |
354 | tcp_tstamp = get_tcp_tstamp(q, skb); |
355 | minimum_time = max_t(ktime_t, minimum_time, tcp_tstamp); | |
356 | ||
4cfd5779 VP |
357 | rcu_read_lock(); |
358 | admin = rcu_dereference(q->admin_sched); | |
359 | sched = rcu_dereference(q->oper_sched); | |
360 | if (admin && ktime_after(minimum_time, admin->base_time)) | |
361 | switch_schedules(q, &admin, &sched); | |
362 | ||
363 | /* Until the schedule starts, all the queues are open */ | |
364 | if (!sched || ktime_before(minimum_time, sched->base_time)) { | |
365 | txtime = minimum_time; | |
366 | goto done; | |
367 | } | |
368 | ||
369 | len = qdisc_pkt_len(skb); | |
370 | packet_transmit_time = length_to_duration(q, len); | |
371 | ||
372 | do { | |
373 | sched_changed = 0; | |
374 | ||
375 | entry = find_entry_to_transmit(skb, sch, sched, admin, | |
376 | minimum_time, | |
377 | &interval_start, &interval_end, | |
378 | false); | |
379 | if (!entry) { | |
380 | txtime = 0; | |
381 | goto done; | |
382 | } | |
383 | ||
384 | txtime = entry->next_txtime; | |
385 | txtime = max_t(ktime_t, txtime, minimum_time); | |
386 | txtime = max_t(ktime_t, txtime, interval_start); | |
387 | ||
388 | if (admin && admin != sched && | |
389 | ktime_after(txtime, admin->base_time)) { | |
390 | sched = admin; | |
391 | sched_changed = 1; | |
392 | continue; | |
393 | } | |
394 | ||
395 | transmit_end_time = ktime_add(txtime, packet_transmit_time); | |
396 | minimum_time = transmit_end_time; | |
397 | ||
398 | /* Update the txtime of current entry to the next time it's | |
399 | * interval starts. | |
400 | */ | |
401 | if (ktime_after(transmit_end_time, interval_end)) | |
402 | entry->next_txtime = ktime_add(interval_start, sched->cycle_time); | |
403 | } while (sched_changed || ktime_after(transmit_end_time, interval_end)); | |
404 | ||
405 | entry->next_txtime = transmit_end_time; | |
406 | ||
407 | done: | |
408 | rcu_read_unlock(); | |
409 | return txtime; | |
410 | } | |
411 | ||
5a781ccb VCG |
412 | static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
413 | struct sk_buff **to_free) | |
414 | { | |
415 | struct taprio_sched *q = qdisc_priv(sch); | |
416 | struct Qdisc *child; | |
417 | int queue; | |
418 | ||
419 | queue = skb_get_queue_mapping(skb); | |
420 | ||
421 | child = q->qdiscs[queue]; | |
422 | if (unlikely(!child)) | |
423 | return qdisc_drop(skb, sch, to_free); | |
424 | ||
4cfd5779 VP |
425 | if (skb->sk && sock_flag(skb->sk, SOCK_TXTIME)) { |
426 | if (!is_valid_interval(skb, sch)) | |
427 | return qdisc_drop(skb, sch, to_free); | |
428 | } else if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { | |
429 | skb->tstamp = get_packet_txtime(skb, sch); | |
430 | if (!skb->tstamp) | |
431 | return qdisc_drop(skb, sch, to_free); | |
432 | } | |
433 | ||
5a781ccb VCG |
434 | qdisc_qstats_backlog_inc(sch, skb); |
435 | sch->q.qlen++; | |
436 | ||
437 | return qdisc_enqueue(skb, child, to_free); | |
438 | } | |
439 | ||
9c66d156 | 440 | static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) |
5a781ccb VCG |
441 | { |
442 | struct taprio_sched *q = qdisc_priv(sch); | |
443 | struct net_device *dev = qdisc_dev(sch); | |
444 | struct sched_entry *entry; | |
445 | struct sk_buff *skb; | |
446 | u32 gate_mask; | |
447 | int i; | |
448 | ||
449 | rcu_read_lock(); | |
450 | entry = rcu_dereference(q->current_entry); | |
2684d1b7 | 451 | gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; |
5a781ccb VCG |
452 | rcu_read_unlock(); |
453 | ||
454 | if (!gate_mask) | |
455 | return NULL; | |
456 | ||
457 | for (i = 0; i < dev->num_tx_queues; i++) { | |
458 | struct Qdisc *child = q->qdiscs[i]; | |
459 | int prio; | |
460 | u8 tc; | |
461 | ||
462 | if (unlikely(!child)) | |
463 | continue; | |
464 | ||
465 | skb = child->ops->peek(child); | |
466 | if (!skb) | |
467 | continue; | |
468 | ||
4cfd5779 VP |
469 | if (TXTIME_ASSIST_IS_ENABLED(q->flags)) |
470 | return skb; | |
471 | ||
5a781ccb VCG |
472 | prio = skb->priority; |
473 | tc = netdev_get_prio_tc_map(dev, prio); | |
474 | ||
475 | if (!(gate_mask & BIT(tc))) | |
2684d1b7 | 476 | continue; |
5a781ccb VCG |
477 | |
478 | return skb; | |
479 | } | |
480 | ||
481 | return NULL; | |
482 | } | |
483 | ||
9c66d156 VCG |
484 | static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) |
485 | { | |
486 | struct taprio_sched *q = qdisc_priv(sch); | |
487 | struct net_device *dev = qdisc_dev(sch); | |
488 | struct sk_buff *skb; | |
489 | int i; | |
490 | ||
491 | for (i = 0; i < dev->num_tx_queues; i++) { | |
492 | struct Qdisc *child = q->qdiscs[i]; | |
493 | ||
494 | if (unlikely(!child)) | |
495 | continue; | |
496 | ||
497 | skb = child->ops->peek(child); | |
498 | if (!skb) | |
499 | continue; | |
500 | ||
501 | return skb; | |
502 | } | |
503 | ||
504 | return NULL; | |
505 | } | |
506 | ||
507 | static struct sk_buff *taprio_peek(struct Qdisc *sch) | |
508 | { | |
509 | struct taprio_sched *q = qdisc_priv(sch); | |
510 | ||
511 | return q->peek(sch); | |
512 | } | |
513 | ||
23bddf69 JK |
514 | static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) |
515 | { | |
516 | atomic_set(&entry->budget, | |
517 | div64_u64((u64)entry->interval * 1000, | |
518 | atomic64_read(&q->picos_per_byte))); | |
5a781ccb VCG |
519 | } |
520 | ||
9c66d156 | 521 | static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) |
5a781ccb VCG |
522 | { |
523 | struct taprio_sched *q = qdisc_priv(sch); | |
524 | struct net_device *dev = qdisc_dev(sch); | |
8c79f0ea | 525 | struct sk_buff *skb = NULL; |
5a781ccb | 526 | struct sched_entry *entry; |
5a781ccb VCG |
527 | u32 gate_mask; |
528 | int i; | |
529 | ||
530 | rcu_read_lock(); | |
531 | entry = rcu_dereference(q->current_entry); | |
532 | /* if there's no entry, it means that the schedule didn't | |
533 | * start yet, so force all gates to be open, this is in | |
534 | * accordance to IEEE 802.1Qbv-2015 Section 8.6.9.4.5 | |
535 | * "AdminGateSates" | |
536 | */ | |
537 | gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN; | |
5a781ccb VCG |
538 | |
539 | if (!gate_mask) | |
8c79f0ea | 540 | goto done; |
5a781ccb VCG |
541 | |
542 | for (i = 0; i < dev->num_tx_queues; i++) { | |
543 | struct Qdisc *child = q->qdiscs[i]; | |
544 | ktime_t guard; | |
545 | int prio; | |
546 | int len; | |
547 | u8 tc; | |
548 | ||
549 | if (unlikely(!child)) | |
550 | continue; | |
551 | ||
4cfd5779 VP |
552 | if (TXTIME_ASSIST_IS_ENABLED(q->flags)) { |
553 | skb = child->ops->dequeue(child); | |
554 | if (!skb) | |
555 | continue; | |
556 | goto skb_found; | |
557 | } | |
558 | ||
5a781ccb VCG |
559 | skb = child->ops->peek(child); |
560 | if (!skb) | |
561 | continue; | |
562 | ||
563 | prio = skb->priority; | |
564 | tc = netdev_get_prio_tc_map(dev, prio); | |
565 | ||
566 | if (!(gate_mask & BIT(tc))) | |
567 | continue; | |
568 | ||
569 | len = qdisc_pkt_len(skb); | |
7ede7b03 | 570 | guard = ktime_add_ns(taprio_get_time(q), |
5a781ccb VCG |
571 | length_to_duration(q, len)); |
572 | ||
573 | /* In the case that there's no gate entry, there's no | |
574 | * guard band ... | |
575 | */ | |
576 | if (gate_mask != TAPRIO_ALL_GATES_OPEN && | |
577 | ktime_after(guard, entry->close_time)) | |
6e734c82 | 578 | continue; |
5a781ccb VCG |
579 | |
580 | /* ... and no budget. */ | |
581 | if (gate_mask != TAPRIO_ALL_GATES_OPEN && | |
582 | atomic_sub_return(len, &entry->budget) < 0) | |
6e734c82 | 583 | continue; |
5a781ccb VCG |
584 | |
585 | skb = child->ops->dequeue(child); | |
586 | if (unlikely(!skb)) | |
8c79f0ea | 587 | goto done; |
5a781ccb | 588 | |
4cfd5779 | 589 | skb_found: |
5a781ccb VCG |
590 | qdisc_bstats_update(sch, skb); |
591 | qdisc_qstats_backlog_dec(sch, skb); | |
592 | sch->q.qlen--; | |
593 | ||
8c79f0ea | 594 | goto done; |
5a781ccb VCG |
595 | } |
596 | ||
8c79f0ea VCG |
597 | done: |
598 | rcu_read_unlock(); | |
599 | ||
600 | return skb; | |
5a781ccb VCG |
601 | } |
602 | ||
9c66d156 VCG |
603 | static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) |
604 | { | |
605 | struct taprio_sched *q = qdisc_priv(sch); | |
606 | struct net_device *dev = qdisc_dev(sch); | |
607 | struct sk_buff *skb; | |
608 | int i; | |
609 | ||
610 | for (i = 0; i < dev->num_tx_queues; i++) { | |
611 | struct Qdisc *child = q->qdiscs[i]; | |
612 | ||
613 | if (unlikely(!child)) | |
614 | continue; | |
615 | ||
616 | skb = child->ops->dequeue(child); | |
617 | if (unlikely(!skb)) | |
618 | continue; | |
619 | ||
620 | qdisc_bstats_update(sch, skb); | |
621 | qdisc_qstats_backlog_dec(sch, skb); | |
622 | sch->q.qlen--; | |
623 | ||
624 | return skb; | |
625 | } | |
626 | ||
627 | return NULL; | |
628 | } | |
629 | ||
630 | static struct sk_buff *taprio_dequeue(struct Qdisc *sch) | |
631 | { | |
632 | struct taprio_sched *q = qdisc_priv(sch); | |
633 | ||
634 | return q->dequeue(sch); | |
635 | } | |
636 | ||
6ca6a665 VCG |
637 | static bool should_restart_cycle(const struct sched_gate_list *oper, |
638 | const struct sched_entry *entry) | |
639 | { | |
640 | if (list_is_last(&entry->list, &oper->entries)) | |
641 | return true; | |
642 | ||
643 | if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0) | |
644 | return true; | |
645 | ||
646 | return false; | |
647 | } | |
648 | ||
a3d43c0d VCG |
649 | static bool should_change_schedules(const struct sched_gate_list *admin, |
650 | const struct sched_gate_list *oper, | |
651 | ktime_t close_time) | |
652 | { | |
c25031e9 | 653 | ktime_t next_base_time, extension_time; |
a3d43c0d VCG |
654 | |
655 | if (!admin) | |
656 | return false; | |
657 | ||
658 | next_base_time = sched_base_time(admin); | |
659 | ||
660 | /* This is the simple case, the close_time would fall after | |
661 | * the next schedule base_time. | |
662 | */ | |
663 | if (ktime_compare(next_base_time, close_time) <= 0) | |
664 | return true; | |
665 | ||
c25031e9 VCG |
666 | /* This is the cycle_time_extension case, if the close_time |
667 | * plus the amount that can be extended would fall after the | |
668 | * next schedule base_time, we can extend the current schedule | |
669 | * for that amount. | |
670 | */ | |
671 | extension_time = ktime_add_ns(close_time, oper->cycle_time_extension); | |
672 | ||
673 | /* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about | |
674 | * how precisely the extension should be made. So after | |
675 | * conformance testing, this logic may change. | |
676 | */ | |
677 | if (ktime_compare(next_base_time, extension_time) <= 0) | |
678 | return true; | |
679 | ||
a3d43c0d VCG |
680 | return false; |
681 | } | |
682 | ||
5a781ccb VCG |
683 | static enum hrtimer_restart advance_sched(struct hrtimer *timer) |
684 | { | |
685 | struct taprio_sched *q = container_of(timer, struct taprio_sched, | |
686 | advance_timer); | |
a3d43c0d | 687 | struct sched_gate_list *oper, *admin; |
5a781ccb VCG |
688 | struct sched_entry *entry, *next; |
689 | struct Qdisc *sch = q->root; | |
690 | ktime_t close_time; | |
691 | ||
692 | spin_lock(&q->current_entry_lock); | |
693 | entry = rcu_dereference_protected(q->current_entry, | |
694 | lockdep_is_held(&q->current_entry_lock)); | |
a3d43c0d VCG |
695 | oper = rcu_dereference_protected(q->oper_sched, |
696 | lockdep_is_held(&q->current_entry_lock)); | |
697 | admin = rcu_dereference_protected(q->admin_sched, | |
698 | lockdep_is_held(&q->current_entry_lock)); | |
5a781ccb | 699 | |
a3d43c0d VCG |
700 | if (!oper) |
701 | switch_schedules(q, &admin, &oper); | |
702 | ||
703 | /* This can happen in two cases: 1. this is the very first run | |
704 | * of this function (i.e. we weren't running any schedule | |
705 | * previously); 2. The previous schedule just ended. The first | |
706 | * entry of all schedules are pre-calculated during the | |
707 | * schedule initialization. | |
5a781ccb | 708 | */ |
a3d43c0d VCG |
709 | if (unlikely(!entry || entry->close_time == oper->base_time)) { |
710 | next = list_first_entry(&oper->entries, struct sched_entry, | |
5a781ccb VCG |
711 | list); |
712 | close_time = next->close_time; | |
713 | goto first_run; | |
714 | } | |
715 | ||
6ca6a665 | 716 | if (should_restart_cycle(oper, entry)) { |
a3d43c0d | 717 | next = list_first_entry(&oper->entries, struct sched_entry, |
5a781ccb | 718 | list); |
6ca6a665 VCG |
719 | oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time, |
720 | oper->cycle_time); | |
721 | } else { | |
5a781ccb | 722 | next = list_next_entry(entry, list); |
6ca6a665 | 723 | } |
5a781ccb VCG |
724 | |
725 | close_time = ktime_add_ns(entry->close_time, next->interval); | |
6ca6a665 | 726 | close_time = min_t(ktime_t, close_time, oper->cycle_close_time); |
5a781ccb | 727 | |
a3d43c0d VCG |
728 | if (should_change_schedules(admin, oper, close_time)) { |
729 | /* Set things so the next time this runs, the new | |
730 | * schedule runs. | |
731 | */ | |
732 | close_time = sched_base_time(admin); | |
733 | switch_schedules(q, &admin, &oper); | |
734 | } | |
735 | ||
5a781ccb | 736 | next->close_time = close_time; |
23bddf69 | 737 | taprio_set_budget(q, next); |
5a781ccb VCG |
738 | |
739 | first_run: | |
740 | rcu_assign_pointer(q->current_entry, next); | |
741 | spin_unlock(&q->current_entry_lock); | |
742 | ||
743 | hrtimer_set_expires(&q->advance_timer, close_time); | |
744 | ||
745 | rcu_read_lock(); | |
746 | __netif_schedule(sch); | |
747 | rcu_read_unlock(); | |
748 | ||
749 | return HRTIMER_RESTART; | |
750 | } | |
751 | ||
752 | static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { | |
753 | [TCA_TAPRIO_SCHED_ENTRY_INDEX] = { .type = NLA_U32 }, | |
754 | [TCA_TAPRIO_SCHED_ENTRY_CMD] = { .type = NLA_U8 }, | |
755 | [TCA_TAPRIO_SCHED_ENTRY_GATE_MASK] = { .type = NLA_U32 }, | |
756 | [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, | |
757 | }; | |
758 | ||
5a781ccb VCG |
759 | static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { |
760 | [TCA_TAPRIO_ATTR_PRIOMAP] = { | |
761 | .len = sizeof(struct tc_mqprio_qopt) | |
762 | }, | |
c25031e9 VCG |
763 | [TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST] = { .type = NLA_NESTED }, |
764 | [TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 }, | |
765 | [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED }, | |
766 | [TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 }, | |
767 | [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 }, | |
768 | [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, | |
5a781ccb VCG |
769 | }; |
770 | ||
771 | static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, | |
772 | struct netlink_ext_ack *extack) | |
773 | { | |
774 | u32 interval = 0; | |
775 | ||
776 | if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) | |
777 | entry->command = nla_get_u8( | |
778 | tb[TCA_TAPRIO_SCHED_ENTRY_CMD]); | |
779 | ||
780 | if (tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]) | |
781 | entry->gate_mask = nla_get_u32( | |
782 | tb[TCA_TAPRIO_SCHED_ENTRY_GATE_MASK]); | |
783 | ||
784 | if (tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]) | |
785 | interval = nla_get_u32( | |
786 | tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); | |
787 | ||
788 | if (interval == 0) { | |
789 | NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); | |
790 | return -EINVAL; | |
791 | } | |
792 | ||
793 | entry->interval = interval; | |
794 | ||
795 | return 0; | |
796 | } | |
797 | ||
798 | static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, | |
799 | int index, struct netlink_ext_ack *extack) | |
800 | { | |
801 | struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; | |
802 | int err; | |
803 | ||
8cb08174 JB |
804 | err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_SCHED_ENTRY_MAX, n, |
805 | entry_policy, NULL); | |
5a781ccb VCG |
806 | if (err < 0) { |
807 | NL_SET_ERR_MSG(extack, "Could not parse nested entry"); | |
808 | return -EINVAL; | |
809 | } | |
810 | ||
811 | entry->index = index; | |
812 | ||
813 | return fill_sched_entry(tb, entry, extack); | |
814 | } | |
815 | ||
5a781ccb | 816 | static int parse_sched_list(struct nlattr *list, |
a3d43c0d | 817 | struct sched_gate_list *sched, |
5a781ccb VCG |
818 | struct netlink_ext_ack *extack) |
819 | { | |
820 | struct nlattr *n; | |
821 | int err, rem; | |
822 | int i = 0; | |
823 | ||
824 | if (!list) | |
825 | return -EINVAL; | |
826 | ||
827 | nla_for_each_nested(n, list, rem) { | |
828 | struct sched_entry *entry; | |
829 | ||
830 | if (nla_type(n) != TCA_TAPRIO_SCHED_ENTRY) { | |
831 | NL_SET_ERR_MSG(extack, "Attribute is not of type 'entry'"); | |
832 | continue; | |
833 | } | |
834 | ||
835 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); | |
836 | if (!entry) { | |
837 | NL_SET_ERR_MSG(extack, "Not enough memory for entry"); | |
838 | return -ENOMEM; | |
839 | } | |
840 | ||
841 | err = parse_sched_entry(n, entry, i, extack); | |
842 | if (err < 0) { | |
843 | kfree(entry); | |
844 | return err; | |
845 | } | |
846 | ||
a3d43c0d | 847 | list_add_tail(&entry->list, &sched->entries); |
5a781ccb VCG |
848 | i++; |
849 | } | |
850 | ||
a3d43c0d | 851 | sched->num_entries = i; |
5a781ccb VCG |
852 | |
853 | return i; | |
854 | } | |
855 | ||
a3d43c0d VCG |
856 | static int parse_taprio_schedule(struct nlattr **tb, |
857 | struct sched_gate_list *new, | |
858 | struct netlink_ext_ack *extack) | |
5a781ccb VCG |
859 | { |
860 | int err = 0; | |
5a781ccb | 861 | |
a3d43c0d VCG |
862 | if (tb[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]) { |
863 | NL_SET_ERR_MSG(extack, "Adding a single entry is not supported"); | |
864 | return -ENOTSUPP; | |
865 | } | |
5a781ccb VCG |
866 | |
867 | if (tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]) | |
a3d43c0d | 868 | new->base_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_BASE_TIME]); |
5a781ccb | 869 | |
c25031e9 VCG |
870 | if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]) |
871 | new->cycle_time_extension = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION]); | |
872 | ||
6ca6a665 VCG |
873 | if (tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]) |
874 | new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); | |
875 | ||
5a781ccb VCG |
876 | if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) |
877 | err = parse_sched_list( | |
a3d43c0d VCG |
878 | tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); |
879 | if (err < 0) | |
880 | return err; | |
5a781ccb | 881 | |
037be037 VP |
882 | if (!new->cycle_time) { |
883 | struct sched_entry *entry; | |
884 | ktime_t cycle = 0; | |
885 | ||
886 | list_for_each_entry(entry, &new->entries, list) | |
887 | cycle = ktime_add_ns(cycle, entry->interval); | |
888 | new->cycle_time = cycle; | |
889 | } | |
890 | ||
a3d43c0d | 891 | return 0; |
5a781ccb VCG |
892 | } |
893 | ||
894 | static int taprio_parse_mqprio_opt(struct net_device *dev, | |
895 | struct tc_mqprio_qopt *qopt, | |
4cfd5779 VP |
896 | struct netlink_ext_ack *extack, |
897 | u32 taprio_flags) | |
5a781ccb VCG |
898 | { |
899 | int i, j; | |
900 | ||
a3d43c0d | 901 | if (!qopt && !dev->num_tc) { |
5a781ccb VCG |
902 | NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); |
903 | return -EINVAL; | |
904 | } | |
905 | ||
a3d43c0d VCG |
906 | /* If num_tc is already set, it means that the user already |
907 | * configured the mqprio part | |
908 | */ | |
909 | if (dev->num_tc) | |
910 | return 0; | |
911 | ||
5a781ccb VCG |
912 | /* Verify num_tc is not out of max range */ |
913 | if (qopt->num_tc > TC_MAX_QUEUE) { | |
914 | NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range"); | |
915 | return -EINVAL; | |
916 | } | |
917 | ||
918 | /* taprio imposes that traffic classes map 1:n to tx queues */ | |
919 | if (qopt->num_tc > dev->num_tx_queues) { | |
920 | NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues"); | |
921 | return -EINVAL; | |
922 | } | |
923 | ||
924 | /* Verify priority mapping uses valid tcs */ | |
b5a0faa3 | 925 | for (i = 0; i <= TC_BITMASK; i++) { |
5a781ccb VCG |
926 | if (qopt->prio_tc_map[i] >= qopt->num_tc) { |
927 | NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping"); | |
928 | return -EINVAL; | |
929 | } | |
930 | } | |
931 | ||
932 | for (i = 0; i < qopt->num_tc; i++) { | |
933 | unsigned int last = qopt->offset[i] + qopt->count[i]; | |
934 | ||
935 | /* Verify the queue count is in tx range being equal to the | |
936 | * real_num_tx_queues indicates the last queue is in use. | |
937 | */ | |
938 | if (qopt->offset[i] >= dev->num_tx_queues || | |
939 | !qopt->count[i] || | |
940 | last > dev->real_num_tx_queues) { | |
941 | NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping"); | |
942 | return -EINVAL; | |
943 | } | |
944 | ||
4cfd5779 VP |
945 | if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) |
946 | continue; | |
947 | ||
5a781ccb VCG |
948 | /* Verify that the offset and counts do not overlap */ |
949 | for (j = i + 1; j < qopt->num_tc; j++) { | |
950 | if (last > qopt->offset[j]) { | |
951 | NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping"); | |
952 | return -EINVAL; | |
953 | } | |
954 | } | |
955 | } | |
956 | ||
957 | return 0; | |
958 | } | |
959 | ||
a3d43c0d VCG |
960 | static int taprio_get_start_time(struct Qdisc *sch, |
961 | struct sched_gate_list *sched, | |
962 | ktime_t *start) | |
5a781ccb VCG |
963 | { |
964 | struct taprio_sched *q = qdisc_priv(sch); | |
5a781ccb VCG |
965 | ktime_t now, base, cycle; |
966 | s64 n; | |
967 | ||
a3d43c0d | 968 | base = sched_base_time(sched); |
7ede7b03 | 969 | now = taprio_get_time(q); |
8599099f AG |
970 | |
971 | if (ktime_after(base, now)) { | |
972 | *start = base; | |
973 | return 0; | |
974 | } | |
5a781ccb | 975 | |
037be037 | 976 | cycle = sched->cycle_time; |
5a781ccb | 977 | |
8599099f AG |
978 | /* The qdisc is expected to have at least one sched_entry. Moreover, |
979 | * any entry must have 'interval' > 0. Thus if the cycle time is zero, | |
980 | * something went really wrong. In that case, we should warn about this | |
981 | * inconsistent state and return error. | |
982 | */ | |
983 | if (WARN_ON(!cycle)) | |
984 | return -EFAULT; | |
5a781ccb VCG |
985 | |
986 | /* Schedule the start time for the beginning of the next | |
987 | * cycle. | |
988 | */ | |
989 | n = div64_s64(ktime_sub_ns(now, base), cycle); | |
8599099f AG |
990 | *start = ktime_add_ns(base, (n + 1) * cycle); |
991 | return 0; | |
5a781ccb VCG |
992 | } |
993 | ||
a3d43c0d VCG |
994 | static void setup_first_close_time(struct taprio_sched *q, |
995 | struct sched_gate_list *sched, ktime_t base) | |
5a781ccb | 996 | { |
5a781ccb | 997 | struct sched_entry *first; |
6ca6a665 | 998 | ktime_t cycle; |
5a781ccb | 999 | |
a3d43c0d VCG |
1000 | first = list_first_entry(&sched->entries, |
1001 | struct sched_entry, list); | |
5a781ccb | 1002 | |
037be037 | 1003 | cycle = sched->cycle_time; |
6ca6a665 VCG |
1004 | |
1005 | /* FIXME: find a better place to do this */ | |
1006 | sched->cycle_close_time = ktime_add_ns(base, cycle); | |
1007 | ||
a3d43c0d | 1008 | first->close_time = ktime_add_ns(base, first->interval); |
23bddf69 | 1009 | taprio_set_budget(q, first); |
5a781ccb | 1010 | rcu_assign_pointer(q->current_entry, NULL); |
a3d43c0d | 1011 | } |
5a781ccb | 1012 | |
a3d43c0d VCG |
1013 | static void taprio_start_sched(struct Qdisc *sch, |
1014 | ktime_t start, struct sched_gate_list *new) | |
1015 | { | |
1016 | struct taprio_sched *q = qdisc_priv(sch); | |
1017 | ktime_t expires; | |
1018 | ||
9c66d156 VCG |
1019 | if (FULL_OFFLOAD_IS_ENABLED(q->flags)) |
1020 | return; | |
1021 | ||
a3d43c0d VCG |
1022 | expires = hrtimer_get_expires(&q->advance_timer); |
1023 | if (expires == 0) | |
1024 | expires = KTIME_MAX; | |
1025 | ||
1026 | /* If the new schedule starts before the next expiration, we | |
1027 | * reprogram it to the earliest one, so we change the admin | |
1028 | * schedule to the operational one at the right time. | |
1029 | */ | |
1030 | start = min_t(ktime_t, start, expires); | |
5a781ccb VCG |
1031 | |
1032 | hrtimer_start(&q->advance_timer, start, HRTIMER_MODE_ABS); | |
1033 | } | |
1034 | ||
7b9eba7b LD |
1035 | static void taprio_set_picos_per_byte(struct net_device *dev, |
1036 | struct taprio_sched *q) | |
1037 | { | |
1038 | struct ethtool_link_ksettings ecmd; | |
f04b514c VO |
1039 | int speed = SPEED_10; |
1040 | int picos_per_byte; | |
1041 | int err; | |
7b9eba7b | 1042 | |
f04b514c VO |
1043 | err = __ethtool_get_link_ksettings(dev, &ecmd); |
1044 | if (err < 0) | |
1045 | goto skip; | |
1046 | ||
9a9251a3 | 1047 | if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN) |
f04b514c | 1048 | speed = ecmd.base.speed; |
7b9eba7b | 1049 | |
f04b514c | 1050 | skip: |
68ce6688 | 1051 | picos_per_byte = (USEC_PER_SEC * 8) / speed; |
7b9eba7b LD |
1052 | |
1053 | atomic64_set(&q->picos_per_byte, picos_per_byte); | |
1054 | netdev_dbg(dev, "taprio: set %s's picos_per_byte to: %lld, linkspeed: %d\n", | |
1055 | dev->name, (long long)atomic64_read(&q->picos_per_byte), | |
1056 | ecmd.base.speed); | |
1057 | } | |
1058 | ||
1059 | static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, | |
1060 | void *ptr) | |
1061 | { | |
1062 | struct net_device *dev = netdev_notifier_info_to_dev(ptr); | |
1063 | struct net_device *qdev; | |
1064 | struct taprio_sched *q; | |
1065 | bool found = false; | |
1066 | ||
1067 | ASSERT_RTNL(); | |
1068 | ||
1069 | if (event != NETDEV_UP && event != NETDEV_CHANGE) | |
1070 | return NOTIFY_DONE; | |
1071 | ||
1072 | spin_lock(&taprio_list_lock); | |
1073 | list_for_each_entry(q, &taprio_list, taprio_list) { | |
1074 | qdev = qdisc_dev(q->root); | |
1075 | if (qdev == dev) { | |
1076 | found = true; | |
1077 | break; | |
1078 | } | |
1079 | } | |
1080 | spin_unlock(&taprio_list_lock); | |
1081 | ||
1082 | if (found) | |
1083 | taprio_set_picos_per_byte(dev, q); | |
1084 | ||
1085 | return NOTIFY_DONE; | |
1086 | } | |
1087 | ||
4cfd5779 VP |
1088 | static void setup_txtime(struct taprio_sched *q, |
1089 | struct sched_gate_list *sched, ktime_t base) | |
1090 | { | |
1091 | struct sched_entry *entry; | |
1092 | u32 interval = 0; | |
1093 | ||
1094 | list_for_each_entry(entry, &sched->entries, list) { | |
1095 | entry->next_txtime = ktime_add_ns(base, interval); | |
1096 | interval += entry->interval; | |
1097 | } | |
1098 | } | |
1099 | ||
9c66d156 VCG |
1100 | static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) |
1101 | { | |
1102 | size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries + | |
1103 | sizeof(struct __tc_taprio_qopt_offload); | |
1104 | struct __tc_taprio_qopt_offload *__offload; | |
1105 | ||
1106 | __offload = kzalloc(size, GFP_KERNEL); | |
1107 | if (!__offload) | |
1108 | return NULL; | |
1109 | ||
1110 | refcount_set(&__offload->users, 1); | |
1111 | ||
1112 | return &__offload->offload; | |
1113 | } | |
1114 | ||
1115 | struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload | |
1116 | *offload) | |
1117 | { | |
1118 | struct __tc_taprio_qopt_offload *__offload; | |
1119 | ||
1120 | __offload = container_of(offload, struct __tc_taprio_qopt_offload, | |
1121 | offload); | |
1122 | ||
1123 | refcount_inc(&__offload->users); | |
1124 | ||
1125 | return offload; | |
1126 | } | |
1127 | EXPORT_SYMBOL_GPL(taprio_offload_get); | |
1128 | ||
1129 | void taprio_offload_free(struct tc_taprio_qopt_offload *offload) | |
1130 | { | |
1131 | struct __tc_taprio_qopt_offload *__offload; | |
1132 | ||
1133 | __offload = container_of(offload, struct __tc_taprio_qopt_offload, | |
1134 | offload); | |
1135 | ||
1136 | if (!refcount_dec_and_test(&__offload->users)) | |
1137 | return; | |
1138 | ||
1139 | kfree(__offload); | |
1140 | } | |
1141 | EXPORT_SYMBOL_GPL(taprio_offload_free); | |
1142 | ||
1143 | /* The function will only serve to keep the pointers to the "oper" and "admin" | |
1144 | * schedules valid in relation to their base times, so when calling dump() the | |
1145 | * users looks at the right schedules. | |
1146 | * When using full offload, the admin configuration is promoted to oper at the | |
1147 | * base_time in the PHC time domain. But because the system time is not | |
1148 | * necessarily in sync with that, we can't just trigger a hrtimer to call | |
1149 | * switch_schedules at the right hardware time. | |
1150 | * At the moment we call this by hand right away from taprio, but in the future | |
1151 | * it will be useful to create a mechanism for drivers to notify taprio of the | |
1152 | * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). | |
1153 | * This is left as TODO. | |
1154 | */ | |
d665c128 | 1155 | static void taprio_offload_config_changed(struct taprio_sched *q) |
9c66d156 VCG |
1156 | { |
1157 | struct sched_gate_list *oper, *admin; | |
1158 | ||
1159 | spin_lock(&q->current_entry_lock); | |
1160 | ||
1161 | oper = rcu_dereference_protected(q->oper_sched, | |
1162 | lockdep_is_held(&q->current_entry_lock)); | |
1163 | admin = rcu_dereference_protected(q->admin_sched, | |
1164 | lockdep_is_held(&q->current_entry_lock)); | |
1165 | ||
1166 | switch_schedules(q, &admin, &oper); | |
1167 | ||
1168 | spin_unlock(&q->current_entry_lock); | |
1169 | } | |
1170 | ||
1171 | static void taprio_sched_to_offload(struct taprio_sched *q, | |
1172 | struct sched_gate_list *sched, | |
1173 | const struct tc_mqprio_qopt *mqprio, | |
1174 | struct tc_taprio_qopt_offload *offload) | |
1175 | { | |
1176 | struct sched_entry *entry; | |
1177 | int i = 0; | |
1178 | ||
1179 | offload->base_time = sched->base_time; | |
1180 | offload->cycle_time = sched->cycle_time; | |
1181 | offload->cycle_time_extension = sched->cycle_time_extension; | |
1182 | ||
1183 | list_for_each_entry(entry, &sched->entries, list) { | |
1184 | struct tc_taprio_sched_entry *e = &offload->entries[i]; | |
1185 | ||
1186 | e->command = entry->command; | |
1187 | e->interval = entry->interval; | |
1188 | e->gate_mask = entry->gate_mask; | |
1189 | i++; | |
1190 | } | |
1191 | ||
1192 | offload->num_entries = i; | |
1193 | } | |
1194 | ||
1195 | static int taprio_enable_offload(struct net_device *dev, | |
1196 | struct tc_mqprio_qopt *mqprio, | |
1197 | struct taprio_sched *q, | |
1198 | struct sched_gate_list *sched, | |
1199 | struct netlink_ext_ack *extack) | |
1200 | { | |
1201 | const struct net_device_ops *ops = dev->netdev_ops; | |
1202 | struct tc_taprio_qopt_offload *offload; | |
1203 | int err = 0; | |
1204 | ||
1205 | if (!ops->ndo_setup_tc) { | |
1206 | NL_SET_ERR_MSG(extack, | |
1207 | "Device does not support taprio offload"); | |
1208 | return -EOPNOTSUPP; | |
1209 | } | |
1210 | ||
1211 | offload = taprio_offload_alloc(sched->num_entries); | |
1212 | if (!offload) { | |
1213 | NL_SET_ERR_MSG(extack, | |
1214 | "Not enough memory for enabling offload mode"); | |
1215 | return -ENOMEM; | |
1216 | } | |
1217 | offload->enable = 1; | |
1218 | taprio_sched_to_offload(q, sched, mqprio, offload); | |
1219 | ||
1220 | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); | |
1221 | if (err < 0) { | |
1222 | NL_SET_ERR_MSG(extack, | |
1223 | "Device failed to setup taprio offload"); | |
1224 | goto done; | |
1225 | } | |
1226 | ||
9c66d156 VCG |
1227 | done: |
1228 | taprio_offload_free(offload); | |
1229 | ||
1230 | return err; | |
1231 | } | |
1232 | ||
1233 | static int taprio_disable_offload(struct net_device *dev, | |
1234 | struct taprio_sched *q, | |
1235 | struct netlink_ext_ack *extack) | |
1236 | { | |
1237 | const struct net_device_ops *ops = dev->netdev_ops; | |
1238 | struct tc_taprio_qopt_offload *offload; | |
1239 | int err; | |
1240 | ||
1241 | if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) | |
1242 | return 0; | |
1243 | ||
1244 | if (!ops->ndo_setup_tc) | |
1245 | return -EOPNOTSUPP; | |
1246 | ||
1247 | offload = taprio_offload_alloc(0); | |
1248 | if (!offload) { | |
1249 | NL_SET_ERR_MSG(extack, | |
1250 | "Not enough memory to disable offload mode"); | |
1251 | return -ENOMEM; | |
1252 | } | |
1253 | offload->enable = 0; | |
1254 | ||
1255 | err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); | |
1256 | if (err < 0) { | |
1257 | NL_SET_ERR_MSG(extack, | |
1258 | "Device failed to disable offload"); | |
1259 | goto out; | |
1260 | } | |
1261 | ||
1262 | out: | |
1263 | taprio_offload_free(offload); | |
1264 | ||
1265 | return err; | |
1266 | } | |
1267 | ||
1268 | /* If full offload is enabled, the only possible clockid is the net device's | |
1269 | * PHC. For that reason, specifying a clockid through netlink is incorrect. | |
1270 | * For txtime-assist, it is implicitly assumed that the device's PHC is kept | |
1271 | * in sync with the specified clockid via a user space daemon such as phc2sys. | |
1272 | * For both software taprio and txtime-assist, the clockid is used for the | |
1273 | * hrtimer that advances the schedule and hence mandatory. | |
1274 | */ | |
1275 | static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, | |
1276 | struct netlink_ext_ack *extack) | |
1277 | { | |
1278 | struct taprio_sched *q = qdisc_priv(sch); | |
1279 | struct net_device *dev = qdisc_dev(sch); | |
1280 | int err = -EINVAL; | |
1281 | ||
1282 | if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { | |
1283 | const struct ethtool_ops *ops = dev->ethtool_ops; | |
1284 | struct ethtool_ts_info info = { | |
1285 | .cmd = ETHTOOL_GET_TS_INFO, | |
1286 | .phc_index = -1, | |
1287 | }; | |
1288 | ||
1289 | if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { | |
1290 | NL_SET_ERR_MSG(extack, | |
1291 | "The 'clockid' cannot be specified for full offload"); | |
1292 | goto out; | |
1293 | } | |
1294 | ||
1295 | if (ops && ops->get_ts_info) | |
1296 | err = ops->get_ts_info(dev, &info); | |
1297 | ||
1298 | if (err || info.phc_index < 0) { | |
1299 | NL_SET_ERR_MSG(extack, | |
1300 | "Device does not have a PTP clock"); | |
1301 | err = -ENOTSUPP; | |
1302 | goto out; | |
1303 | } | |
1304 | } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { | |
1305 | int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); | |
1306 | ||
1307 | /* We only support static clockids and we don't allow | |
1308 | * for it to be modified after the first init. | |
1309 | */ | |
1310 | if (clockid < 0 || | |
1311 | (q->clockid != -1 && q->clockid != clockid)) { | |
1312 | NL_SET_ERR_MSG(extack, | |
1313 | "Changing the 'clockid' of a running schedule is not supported"); | |
1314 | err = -ENOTSUPP; | |
1315 | goto out; | |
1316 | } | |
1317 | ||
1318 | switch (clockid) { | |
1319 | case CLOCK_REALTIME: | |
1320 | q->tk_offset = TK_OFFS_REAL; | |
1321 | break; | |
1322 | case CLOCK_MONOTONIC: | |
1323 | q->tk_offset = TK_OFFS_MAX; | |
1324 | break; | |
1325 | case CLOCK_BOOTTIME: | |
1326 | q->tk_offset = TK_OFFS_BOOT; | |
1327 | break; | |
1328 | case CLOCK_TAI: | |
1329 | q->tk_offset = TK_OFFS_TAI; | |
1330 | break; | |
1331 | default: | |
1332 | NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); | |
1333 | err = -EINVAL; | |
1334 | goto out; | |
1335 | } | |
1336 | ||
1337 | q->clockid = clockid; | |
1338 | } else { | |
1339 | NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); | |
1340 | goto out; | |
1341 | } | |
a954380a VCG |
1342 | |
1343 | /* Everything went ok, return success. */ | |
1344 | err = 0; | |
1345 | ||
9c66d156 VCG |
1346 | out: |
1347 | return err; | |
1348 | } | |
1349 | ||
b5a0faa3 IK |
1350 | static int taprio_mqprio_cmp(const struct net_device *dev, |
1351 | const struct tc_mqprio_qopt *mqprio) | |
1352 | { | |
1353 | int i; | |
1354 | ||
1355 | if (!mqprio || mqprio->num_tc != dev->num_tc) | |
1356 | return -1; | |
1357 | ||
1358 | for (i = 0; i < mqprio->num_tc; i++) | |
1359 | if (dev->tc_to_txq[i].count != mqprio->count[i] || | |
1360 | dev->tc_to_txq[i].offset != mqprio->offset[i]) | |
1361 | return -1; | |
1362 | ||
1363 | for (i = 0; i <= TC_BITMASK; i++) | |
1364 | if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i]) | |
1365 | return -1; | |
1366 | ||
1367 | return 0; | |
1368 | } | |
1369 | ||
5a781ccb VCG |
1370 | static int taprio_change(struct Qdisc *sch, struct nlattr *opt, |
1371 | struct netlink_ext_ack *extack) | |
1372 | { | |
1373 | struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { }; | |
a3d43c0d | 1374 | struct sched_gate_list *oper, *admin, *new_admin; |
5a781ccb VCG |
1375 | struct taprio_sched *q = qdisc_priv(sch); |
1376 | struct net_device *dev = qdisc_dev(sch); | |
1377 | struct tc_mqprio_qopt *mqprio = NULL; | |
4cfd5779 | 1378 | u32 taprio_flags = 0; |
a3d43c0d | 1379 | unsigned long flags; |
5a781ccb | 1380 | ktime_t start; |
9c66d156 | 1381 | int i, err; |
5a781ccb | 1382 | |
8cb08174 JB |
1383 | err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, |
1384 | taprio_policy, extack); | |
5a781ccb VCG |
1385 | if (err < 0) |
1386 | return err; | |
1387 | ||
5a781ccb VCG |
1388 | if (tb[TCA_TAPRIO_ATTR_PRIOMAP]) |
1389 | mqprio = nla_data(tb[TCA_TAPRIO_ATTR_PRIOMAP]); | |
1390 | ||
4cfd5779 VP |
1391 | if (tb[TCA_TAPRIO_ATTR_FLAGS]) { |
1392 | taprio_flags = nla_get_u32(tb[TCA_TAPRIO_ATTR_FLAGS]); | |
1393 | ||
1394 | if (q->flags != 0 && q->flags != taprio_flags) { | |
1395 | NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); | |
1396 | return -EOPNOTSUPP; | |
9c66d156 | 1397 | } else if (!taprio_flags_valid(taprio_flags)) { |
4cfd5779 VP |
1398 | NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); |
1399 | return -EINVAL; | |
1400 | } | |
1401 | ||
1402 | q->flags = taprio_flags; | |
1403 | } | |
1404 | ||
1405 | err = taprio_parse_mqprio_opt(dev, mqprio, extack, taprio_flags); | |
5a781ccb VCG |
1406 | if (err < 0) |
1407 | return err; | |
1408 | ||
a3d43c0d VCG |
1409 | new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); |
1410 | if (!new_admin) { | |
1411 | NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); | |
1412 | return -ENOMEM; | |
1413 | } | |
1414 | INIT_LIST_HEAD(&new_admin->entries); | |
5a781ccb | 1415 | |
a3d43c0d VCG |
1416 | rcu_read_lock(); |
1417 | oper = rcu_dereference(q->oper_sched); | |
1418 | admin = rcu_dereference(q->admin_sched); | |
1419 | rcu_read_unlock(); | |
5a781ccb | 1420 | |
b5a0faa3 IK |
1421 | /* no changes - no new mqprio settings */ |
1422 | if (!taprio_mqprio_cmp(dev, mqprio)) | |
1423 | mqprio = NULL; | |
1424 | ||
a3d43c0d VCG |
1425 | if (mqprio && (oper || admin)) { |
1426 | NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported"); | |
1427 | err = -ENOTSUPP; | |
1428 | goto free_sched; | |
5a781ccb VCG |
1429 | } |
1430 | ||
a3d43c0d VCG |
1431 | err = parse_taprio_schedule(tb, new_admin, extack); |
1432 | if (err < 0) | |
1433 | goto free_sched; | |
5a781ccb | 1434 | |
a3d43c0d VCG |
1435 | if (new_admin->num_entries == 0) { |
1436 | NL_SET_ERR_MSG(extack, "There should be at least one entry in the schedule"); | |
1437 | err = -EINVAL; | |
1438 | goto free_sched; | |
1439 | } | |
5a781ccb | 1440 | |
9c66d156 VCG |
1441 | err = taprio_parse_clockid(sch, tb, extack); |
1442 | if (err < 0) | |
a3d43c0d | 1443 | goto free_sched; |
a3d43c0d VCG |
1444 | |
1445 | taprio_set_picos_per_byte(dev, q); | |
1446 | ||
9c66d156 VCG |
1447 | if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) |
1448 | err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); | |
1449 | else | |
1450 | err = taprio_disable_offload(dev, q, extack); | |
1451 | if (err) | |
1452 | goto free_sched; | |
1453 | ||
a3d43c0d VCG |
1454 | /* Protects against enqueue()/dequeue() */ |
1455 | spin_lock_bh(qdisc_lock(sch)); | |
1456 | ||
4cfd5779 VP |
1457 | if (tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]) { |
1458 | if (!TXTIME_ASSIST_IS_ENABLED(q->flags)) { | |
1459 | NL_SET_ERR_MSG_MOD(extack, "txtime-delay can only be set when txtime-assist mode is enabled"); | |
1460 | err = -EINVAL; | |
1461 | goto unlock; | |
1462 | } | |
1463 | ||
a5b64700 | 1464 | q->txtime_delay = nla_get_u32(tb[TCA_TAPRIO_ATTR_TXTIME_DELAY]); |
4cfd5779 VP |
1465 | } |
1466 | ||
1467 | if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && | |
9c66d156 | 1468 | !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && |
4cfd5779 | 1469 | !hrtimer_active(&q->advance_timer)) { |
a3d43c0d VCG |
1470 | hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); |
1471 | q->advance_timer.function = advance_sched; | |
5a781ccb VCG |
1472 | } |
1473 | ||
1474 | if (mqprio) { | |
1475 | netdev_set_num_tc(dev, mqprio->num_tc); | |
1476 | for (i = 0; i < mqprio->num_tc; i++) | |
1477 | netdev_set_tc_queue(dev, i, | |
1478 | mqprio->count[i], | |
1479 | mqprio->offset[i]); | |
1480 | ||
1481 | /* Always use supplied priority mappings */ | |
b5a0faa3 | 1482 | for (i = 0; i <= TC_BITMASK; i++) |
5a781ccb VCG |
1483 | netdev_set_prio_tc_map(dev, i, |
1484 | mqprio->prio_tc_map[i]); | |
1485 | } | |
1486 | ||
9c66d156 VCG |
1487 | if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { |
1488 | q->dequeue = taprio_dequeue_offload; | |
1489 | q->peek = taprio_peek_offload; | |
1490 | } else { | |
1491 | /* Be sure to always keep the function pointers | |
1492 | * in a consistent state. | |
1493 | */ | |
1494 | q->dequeue = taprio_dequeue_soft; | |
1495 | q->peek = taprio_peek_soft; | |
a3d43c0d | 1496 | } |
8599099f | 1497 | |
a3d43c0d | 1498 | err = taprio_get_start_time(sch, new_admin, &start); |
8599099f AG |
1499 | if (err < 0) { |
1500 | NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); | |
a3d43c0d | 1501 | goto unlock; |
8599099f | 1502 | } |
5a781ccb | 1503 | |
4cfd5779 VP |
1504 | if (TXTIME_ASSIST_IS_ENABLED(taprio_flags)) { |
1505 | setup_txtime(q, new_admin, start); | |
5a781ccb | 1506 | |
4cfd5779 VP |
1507 | if (!oper) { |
1508 | rcu_assign_pointer(q->oper_sched, new_admin); | |
1509 | err = 0; | |
1510 | new_admin = NULL; | |
1511 | goto unlock; | |
1512 | } | |
a3d43c0d | 1513 | |
4cfd5779 VP |
1514 | rcu_assign_pointer(q->admin_sched, new_admin); |
1515 | if (admin) | |
1516 | call_rcu(&admin->rcu, taprio_free_sched_cb); | |
1517 | } else { | |
1518 | setup_first_close_time(q, new_admin, start); | |
a3d43c0d | 1519 | |
4cfd5779 VP |
1520 | /* Protects against advance_sched() */ |
1521 | spin_lock_irqsave(&q->current_entry_lock, flags); | |
1522 | ||
1523 | taprio_start_sched(sch, start, new_admin); | |
a3d43c0d | 1524 | |
4cfd5779 VP |
1525 | rcu_assign_pointer(q->admin_sched, new_admin); |
1526 | if (admin) | |
1527 | call_rcu(&admin->rcu, taprio_free_sched_cb); | |
a3d43c0d | 1528 | |
4cfd5779 | 1529 | spin_unlock_irqrestore(&q->current_entry_lock, flags); |
0763b3e8 IK |
1530 | |
1531 | if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) | |
1532 | taprio_offload_config_changed(q); | |
4cfd5779 VP |
1533 | } |
1534 | ||
1535 | new_admin = NULL; | |
a3d43c0d VCG |
1536 | err = 0; |
1537 | ||
1538 | unlock: | |
1539 | spin_unlock_bh(qdisc_lock(sch)); | |
1540 | ||
1541 | free_sched: | |
51650d33 IK |
1542 | if (new_admin) |
1543 | call_rcu(&new_admin->rcu, taprio_free_sched_cb); | |
a3d43c0d VCG |
1544 | |
1545 | return err; | |
5a781ccb VCG |
1546 | } |
1547 | ||
1548 | static void taprio_destroy(struct Qdisc *sch) | |
1549 | { | |
1550 | struct taprio_sched *q = qdisc_priv(sch); | |
1551 | struct net_device *dev = qdisc_dev(sch); | |
5a781ccb VCG |
1552 | unsigned int i; |
1553 | ||
7b9eba7b LD |
1554 | spin_lock(&taprio_list_lock); |
1555 | list_del(&q->taprio_list); | |
1556 | spin_unlock(&taprio_list_lock); | |
1557 | ||
5a781ccb VCG |
1558 | hrtimer_cancel(&q->advance_timer); |
1559 | ||
9c66d156 VCG |
1560 | taprio_disable_offload(dev, q, NULL); |
1561 | ||
5a781ccb VCG |
1562 | if (q->qdiscs) { |
1563 | for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) | |
1564 | qdisc_put(q->qdiscs[i]); | |
1565 | ||
1566 | kfree(q->qdiscs); | |
1567 | } | |
1568 | q->qdiscs = NULL; | |
1569 | ||
1570 | netdev_set_num_tc(dev, 0); | |
1571 | ||
a3d43c0d VCG |
1572 | if (q->oper_sched) |
1573 | call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); | |
1574 | ||
1575 | if (q->admin_sched) | |
1576 | call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb); | |
5a781ccb VCG |
1577 | } |
1578 | ||
1579 | static int taprio_init(struct Qdisc *sch, struct nlattr *opt, | |
1580 | struct netlink_ext_ack *extack) | |
1581 | { | |
1582 | struct taprio_sched *q = qdisc_priv(sch); | |
1583 | struct net_device *dev = qdisc_dev(sch); | |
a3d43c0d | 1584 | int i; |
5a781ccb | 1585 | |
5a781ccb VCG |
1586 | spin_lock_init(&q->current_entry_lock); |
1587 | ||
5a781ccb | 1588 | hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); |
a3d43c0d | 1589 | q->advance_timer.function = advance_sched; |
5a781ccb | 1590 | |
9c66d156 VCG |
1591 | q->dequeue = taprio_dequeue_soft; |
1592 | q->peek = taprio_peek_soft; | |
1593 | ||
5a781ccb VCG |
1594 | q->root = sch; |
1595 | ||
1596 | /* We only support static clockids. Use an invalid value as default | |
1597 | * and get the valid one on taprio_change(). | |
1598 | */ | |
1599 | q->clockid = -1; | |
1600 | ||
efb55222 VO |
1601 | spin_lock(&taprio_list_lock); |
1602 | list_add(&q->taprio_list, &taprio_list); | |
1603 | spin_unlock(&taprio_list_lock); | |
1604 | ||
5a781ccb VCG |
1605 | if (sch->parent != TC_H_ROOT) |
1606 | return -EOPNOTSUPP; | |
1607 | ||
1608 | if (!netif_is_multiqueue(dev)) | |
1609 | return -EOPNOTSUPP; | |
1610 | ||
1611 | /* pre-allocate qdisc, attachment can't fail */ | |
1612 | q->qdiscs = kcalloc(dev->num_tx_queues, | |
1613 | sizeof(q->qdiscs[0]), | |
1614 | GFP_KERNEL); | |
1615 | ||
1616 | if (!q->qdiscs) | |
1617 | return -ENOMEM; | |
1618 | ||
1619 | if (!opt) | |
1620 | return -EINVAL; | |
1621 | ||
a3d43c0d VCG |
1622 | for (i = 0; i < dev->num_tx_queues; i++) { |
1623 | struct netdev_queue *dev_queue; | |
1624 | struct Qdisc *qdisc; | |
1625 | ||
1626 | dev_queue = netdev_get_tx_queue(dev, i); | |
1627 | qdisc = qdisc_create_dflt(dev_queue, | |
1628 | &pfifo_qdisc_ops, | |
1629 | TC_H_MAKE(TC_H_MAJ(sch->handle), | |
1630 | TC_H_MIN(i + 1)), | |
1631 | extack); | |
1632 | if (!qdisc) | |
1633 | return -ENOMEM; | |
1634 | ||
1635 | if (i < dev->real_num_tx_queues) | |
1636 | qdisc_hash_add(qdisc, false); | |
1637 | ||
1638 | q->qdiscs[i] = qdisc; | |
1639 | } | |
1640 | ||
5a781ccb VCG |
1641 | return taprio_change(sch, opt, extack); |
1642 | } | |
1643 | ||
1644 | static struct netdev_queue *taprio_queue_get(struct Qdisc *sch, | |
1645 | unsigned long cl) | |
1646 | { | |
1647 | struct net_device *dev = qdisc_dev(sch); | |
1648 | unsigned long ntx = cl - 1; | |
1649 | ||
1650 | if (ntx >= dev->num_tx_queues) | |
1651 | return NULL; | |
1652 | ||
1653 | return netdev_get_tx_queue(dev, ntx); | |
1654 | } | |
1655 | ||
1656 | static int taprio_graft(struct Qdisc *sch, unsigned long cl, | |
1657 | struct Qdisc *new, struct Qdisc **old, | |
1658 | struct netlink_ext_ack *extack) | |
1659 | { | |
1660 | struct taprio_sched *q = qdisc_priv(sch); | |
1661 | struct net_device *dev = qdisc_dev(sch); | |
1662 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); | |
1663 | ||
1664 | if (!dev_queue) | |
1665 | return -EINVAL; | |
1666 | ||
1667 | if (dev->flags & IFF_UP) | |
1668 | dev_deactivate(dev); | |
1669 | ||
1670 | *old = q->qdiscs[cl - 1]; | |
1671 | q->qdiscs[cl - 1] = new; | |
1672 | ||
1673 | if (new) | |
1674 | new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; | |
1675 | ||
1676 | if (dev->flags & IFF_UP) | |
1677 | dev_activate(dev); | |
1678 | ||
1679 | return 0; | |
1680 | } | |
1681 | ||
1682 | static int dump_entry(struct sk_buff *msg, | |
1683 | const struct sched_entry *entry) | |
1684 | { | |
1685 | struct nlattr *item; | |
1686 | ||
ae0be8de | 1687 | item = nla_nest_start_noflag(msg, TCA_TAPRIO_SCHED_ENTRY); |
5a781ccb VCG |
1688 | if (!item) |
1689 | return -ENOSPC; | |
1690 | ||
1691 | if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INDEX, entry->index)) | |
1692 | goto nla_put_failure; | |
1693 | ||
1694 | if (nla_put_u8(msg, TCA_TAPRIO_SCHED_ENTRY_CMD, entry->command)) | |
1695 | goto nla_put_failure; | |
1696 | ||
1697 | if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_GATE_MASK, | |
1698 | entry->gate_mask)) | |
1699 | goto nla_put_failure; | |
1700 | ||
1701 | if (nla_put_u32(msg, TCA_TAPRIO_SCHED_ENTRY_INTERVAL, | |
1702 | entry->interval)) | |
1703 | goto nla_put_failure; | |
1704 | ||
1705 | return nla_nest_end(msg, item); | |
1706 | ||
1707 | nla_put_failure: | |
1708 | nla_nest_cancel(msg, item); | |
1709 | return -1; | |
1710 | } | |
1711 | ||
a3d43c0d VCG |
1712 | static int dump_schedule(struct sk_buff *msg, |
1713 | const struct sched_gate_list *root) | |
1714 | { | |
1715 | struct nlattr *entry_list; | |
1716 | struct sched_entry *entry; | |
1717 | ||
1718 | if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_BASE_TIME, | |
1719 | root->base_time, TCA_TAPRIO_PAD)) | |
1720 | return -1; | |
1721 | ||
6ca6a665 VCG |
1722 | if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME, |
1723 | root->cycle_time, TCA_TAPRIO_PAD)) | |
1724 | return -1; | |
1725 | ||
c25031e9 VCG |
1726 | if (nla_put_s64(msg, TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION, |
1727 | root->cycle_time_extension, TCA_TAPRIO_PAD)) | |
1728 | return -1; | |
1729 | ||
a3d43c0d VCG |
1730 | entry_list = nla_nest_start_noflag(msg, |
1731 | TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST); | |
1732 | if (!entry_list) | |
1733 | goto error_nest; | |
1734 | ||
1735 | list_for_each_entry(entry, &root->entries, list) { | |
1736 | if (dump_entry(msg, entry) < 0) | |
1737 | goto error_nest; | |
1738 | } | |
1739 | ||
1740 | nla_nest_end(msg, entry_list); | |
1741 | return 0; | |
1742 | ||
1743 | error_nest: | |
1744 | nla_nest_cancel(msg, entry_list); | |
1745 | return -1; | |
1746 | } | |
1747 | ||
5a781ccb VCG |
1748 | static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) |
1749 | { | |
1750 | struct taprio_sched *q = qdisc_priv(sch); | |
1751 | struct net_device *dev = qdisc_dev(sch); | |
a3d43c0d | 1752 | struct sched_gate_list *oper, *admin; |
5a781ccb | 1753 | struct tc_mqprio_qopt opt = { 0 }; |
a3d43c0d | 1754 | struct nlattr *nest, *sched_nest; |
5a781ccb VCG |
1755 | unsigned int i; |
1756 | ||
a3d43c0d VCG |
1757 | rcu_read_lock(); |
1758 | oper = rcu_dereference(q->oper_sched); | |
1759 | admin = rcu_dereference(q->admin_sched); | |
1760 | ||
5a781ccb VCG |
1761 | opt.num_tc = netdev_get_num_tc(dev); |
1762 | memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); | |
1763 | ||
1764 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | |
1765 | opt.count[i] = dev->tc_to_txq[i].count; | |
1766 | opt.offset[i] = dev->tc_to_txq[i].offset; | |
1767 | } | |
1768 | ||
ae0be8de | 1769 | nest = nla_nest_start_noflag(skb, TCA_OPTIONS); |
5a781ccb | 1770 | if (!nest) |
a3d43c0d | 1771 | goto start_error; |
5a781ccb VCG |
1772 | |
1773 | if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) | |
1774 | goto options_error; | |
1775 | ||
9c66d156 VCG |
1776 | if (!FULL_OFFLOAD_IS_ENABLED(q->flags) && |
1777 | nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) | |
5a781ccb VCG |
1778 | goto options_error; |
1779 | ||
4cfd5779 VP |
1780 | if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) |
1781 | goto options_error; | |
1782 | ||
1783 | if (q->txtime_delay && | |
a5b64700 | 1784 | nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) |
4cfd5779 VP |
1785 | goto options_error; |
1786 | ||
a3d43c0d | 1787 | if (oper && dump_schedule(skb, oper)) |
5a781ccb VCG |
1788 | goto options_error; |
1789 | ||
a3d43c0d VCG |
1790 | if (!admin) |
1791 | goto done; | |
1792 | ||
1793 | sched_nest = nla_nest_start_noflag(skb, TCA_TAPRIO_ATTR_ADMIN_SCHED); | |
e4acf427 CIK |
1794 | if (!sched_nest) |
1795 | goto options_error; | |
5a781ccb | 1796 | |
a3d43c0d VCG |
1797 | if (dump_schedule(skb, admin)) |
1798 | goto admin_error; | |
1799 | ||
1800 | nla_nest_end(skb, sched_nest); | |
1801 | ||
1802 | done: | |
1803 | rcu_read_unlock(); | |
5a781ccb VCG |
1804 | |
1805 | return nla_nest_end(skb, nest); | |
1806 | ||
a3d43c0d VCG |
1807 | admin_error: |
1808 | nla_nest_cancel(skb, sched_nest); | |
1809 | ||
5a781ccb VCG |
1810 | options_error: |
1811 | nla_nest_cancel(skb, nest); | |
a3d43c0d VCG |
1812 | |
1813 | start_error: | |
1814 | rcu_read_unlock(); | |
1815 | return -ENOSPC; | |
5a781ccb VCG |
1816 | } |
1817 | ||
1818 | static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) | |
1819 | { | |
1820 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); | |
1821 | ||
1822 | if (!dev_queue) | |
1823 | return NULL; | |
1824 | ||
1825 | return dev_queue->qdisc_sleeping; | |
1826 | } | |
1827 | ||
1828 | static unsigned long taprio_find(struct Qdisc *sch, u32 classid) | |
1829 | { | |
1830 | unsigned int ntx = TC_H_MIN(classid); | |
1831 | ||
1832 | if (!taprio_queue_get(sch, ntx)) | |
1833 | return 0; | |
1834 | return ntx; | |
1835 | } | |
1836 | ||
1837 | static int taprio_dump_class(struct Qdisc *sch, unsigned long cl, | |
1838 | struct sk_buff *skb, struct tcmsg *tcm) | |
1839 | { | |
1840 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); | |
1841 | ||
1842 | tcm->tcm_parent = TC_H_ROOT; | |
1843 | tcm->tcm_handle |= TC_H_MIN(cl); | |
1844 | tcm->tcm_info = dev_queue->qdisc_sleeping->handle; | |
1845 | ||
1846 | return 0; | |
1847 | } | |
1848 | ||
1849 | static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, | |
1850 | struct gnet_dump *d) | |
1851 | __releases(d->lock) | |
1852 | __acquires(d->lock) | |
1853 | { | |
1854 | struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); | |
1855 | ||
1856 | sch = dev_queue->qdisc_sleeping; | |
1857 | if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 || | |
5dd431b6 | 1858 | qdisc_qstats_copy(d, sch) < 0) |
5a781ccb VCG |
1859 | return -1; |
1860 | return 0; | |
1861 | } | |
1862 | ||
1863 | static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |
1864 | { | |
1865 | struct net_device *dev = qdisc_dev(sch); | |
1866 | unsigned long ntx; | |
1867 | ||
1868 | if (arg->stop) | |
1869 | return; | |
1870 | ||
1871 | arg->count = arg->skip; | |
1872 | for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { | |
1873 | if (arg->fn(sch, ntx + 1, arg) < 0) { | |
1874 | arg->stop = 1; | |
1875 | break; | |
1876 | } | |
1877 | arg->count++; | |
1878 | } | |
1879 | } | |
1880 | ||
1881 | static struct netdev_queue *taprio_select_queue(struct Qdisc *sch, | |
1882 | struct tcmsg *tcm) | |
1883 | { | |
1884 | return taprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); | |
1885 | } | |
1886 | ||
1887 | static const struct Qdisc_class_ops taprio_class_ops = { | |
1888 | .graft = taprio_graft, | |
1889 | .leaf = taprio_leaf, | |
1890 | .find = taprio_find, | |
1891 | .walk = taprio_walk, | |
1892 | .dump = taprio_dump_class, | |
1893 | .dump_stats = taprio_dump_class_stats, | |
1894 | .select_queue = taprio_select_queue, | |
1895 | }; | |
1896 | ||
1897 | static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { | |
1898 | .cl_ops = &taprio_class_ops, | |
1899 | .id = "taprio", | |
1900 | .priv_size = sizeof(struct taprio_sched), | |
1901 | .init = taprio_init, | |
a3d43c0d | 1902 | .change = taprio_change, |
5a781ccb VCG |
1903 | .destroy = taprio_destroy, |
1904 | .peek = taprio_peek, | |
1905 | .dequeue = taprio_dequeue, | |
1906 | .enqueue = taprio_enqueue, | |
1907 | .dump = taprio_dump, | |
1908 | .owner = THIS_MODULE, | |
1909 | }; | |
1910 | ||
7b9eba7b LD |
1911 | static struct notifier_block taprio_device_notifier = { |
1912 | .notifier_call = taprio_dev_notifier, | |
1913 | }; | |
1914 | ||
5a781ccb VCG |
1915 | static int __init taprio_module_init(void) |
1916 | { | |
7b9eba7b LD |
1917 | int err = register_netdevice_notifier(&taprio_device_notifier); |
1918 | ||
1919 | if (err) | |
1920 | return err; | |
1921 | ||
5a781ccb VCG |
1922 | return register_qdisc(&taprio_qdisc_ops); |
1923 | } | |
1924 | ||
1925 | static void __exit taprio_module_exit(void) | |
1926 | { | |
1927 | unregister_qdisc(&taprio_qdisc_ops); | |
7b9eba7b | 1928 | unregister_netdevice_notifier(&taprio_device_notifier); |
5a781ccb VCG |
1929 | } |
1930 | ||
1931 | module_init(taprio_module_init); | |
1932 | module_exit(taprio_module_exit); | |
1933 | MODULE_LICENSE("GPL"); |