Commit | Line | Data |
---|---|---|
046f6fd5 THJ |
1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | ||
3 | /* COMMON Applications Kept Enhanced (CAKE) discipline | |
4 | * | |
5 | * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com> | |
6 | * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk> | |
7 | * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com> | |
8 | * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de> | |
9 | * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> | |
10 | * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au> | |
11 | * | |
12 | * The CAKE Principles: | |
13 | * (or, how to have your cake and eat it too) | |
14 | * | |
15 | * This is a combination of several shaping, AQM and FQ techniques into one | |
16 | * easy-to-use package: | |
17 | * | |
18 | * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE | |
19 | * equipment and bloated MACs. This operates in deficit mode (as in sch_fq), | |
20 | * eliminating the need for any sort of burst parameter (eg. token bucket | |
21 | * depth). Burst support is limited to that necessary to overcome scheduling | |
22 | * latency. | |
23 | * | |
24 | * - A Diffserv-aware priority queue, giving more priority to certain classes, | |
25 | * up to a specified fraction of bandwidth. Above that bandwidth threshold, | |
26 | * the priority is reduced to avoid starving other tins. | |
27 | * | |
28 | * - Each priority tin has a separate Flow Queue system, to isolate traffic | |
29 | * flows from each other. This prevents a burst on one flow from increasing | |
30 | * the delay to another. Flows are distributed to queues using a | |
31 | * set-associative hash function. | |
32 | * | |
33 | * - Each queue is actively managed by Cobalt, which is a combination of the | |
34 | * Codel and Blue AQM algorithms. This serves flows fairly, and signals | |
35 | * congestion early via ECN (if available) and/or packet drops, to keep | |
36 | * latency low. The codel parameters are auto-tuned based on the bandwidth | |
37 | * setting, as is necessary at low bandwidths. | |
38 | * | |
39 | * The configuration parameters are kept deliberately simple for ease of use. | |
40 | * Everything has sane defaults. Complete generality of configuration is *not* | |
41 | * a goal. | |
42 | * | |
43 | * The priority queue operates according to a weighted DRR scheme, combined with | |
44 | * a bandwidth tracker which reuses the shaper logic to detect which side of the | |
45 | * bandwidth sharing threshold the tin is operating. This determines whether a | |
46 | * priority-based weight (high) or a bandwidth-based weight (low) is used for | |
47 | * that tin in the current pass. | |
48 | * | |
49 | * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly | |
50 | * granted us permission to leverage. | |
51 | */ | |
52 | ||
53 | #include <linux/module.h> | |
54 | #include <linux/types.h> | |
55 | #include <linux/kernel.h> | |
56 | #include <linux/jiffies.h> | |
57 | #include <linux/string.h> | |
58 | #include <linux/in.h> | |
59 | #include <linux/errno.h> | |
60 | #include <linux/init.h> | |
61 | #include <linux/skbuff.h> | |
62 | #include <linux/jhash.h> | |
63 | #include <linux/slab.h> | |
64 | #include <linux/vmalloc.h> | |
65 | #include <linux/reciprocal_div.h> | |
66 | #include <net/netlink.h> | |
046f6fd5 THJ |
67 | #include <linux/if_vlan.h> |
68 | #include <net/pkt_sched.h> | |
69 | #include <net/pkt_cls.h> | |
70 | #include <net/tcp.h> | |
71 | #include <net/flow_dissector.h> | |
72 | ||
ea825115 THJ |
73 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
74 | #include <net/netfilter/nf_conntrack_core.h> | |
75 | #endif | |
76 | ||
046f6fd5 THJ |
77 | #define CAKE_SET_WAYS (8) |
78 | #define CAKE_MAX_TINS (8) | |
79 | #define CAKE_QUEUES (1024) | |
80 | #define CAKE_FLOW_MASK 63 | |
81 | #define CAKE_FLOW_NAT_FLAG 64 | |
82 | ||
83 | /* struct cobalt_params - contains codel and blue parameters | |
84 | * @interval: codel initial drop rate | |
85 | * @target: maximum persistent sojourn time & blue update rate | |
86 | * @mtu_time: serialisation delay of maximum-size packet | |
87 | * @p_inc: increment of blue drop probability (0.32 fxp) | |
88 | * @p_dec: decrement of blue drop probability (0.32 fxp) | |
89 | */ | |
90 | struct cobalt_params { | |
91 | u64 interval; | |
92 | u64 target; | |
93 | u64 mtu_time; | |
94 | u32 p_inc; | |
95 | u32 p_dec; | |
96 | }; | |
97 | ||
98 | /* struct cobalt_vars - contains codel and blue variables | |
99 | * @count: codel dropping frequency | |
100 | * @rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 | |
101 | * @drop_next: time to drop next packet, or when we dropped last | |
102 | * @blue_timer: Blue time to next drop | |
103 | * @p_drop: BLUE drop probability (0.32 fxp) | |
104 | * @dropping: set if in dropping state | |
105 | * @ecn_marked: set if marked | |
106 | */ | |
107 | struct cobalt_vars { | |
108 | u32 count; | |
109 | u32 rec_inv_sqrt; | |
110 | ktime_t drop_next; | |
111 | ktime_t blue_timer; | |
112 | u32 p_drop; | |
113 | bool dropping; | |
114 | bool ecn_marked; | |
115 | }; | |
116 | ||
117 | enum { | |
118 | CAKE_SET_NONE = 0, | |
119 | CAKE_SET_SPARSE, | |
120 | CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */ | |
121 | CAKE_SET_BULK, | |
122 | CAKE_SET_DECAYING | |
123 | }; | |
124 | ||
125 | struct cake_flow { | |
126 | /* this stuff is all needed per-flow at dequeue time */ | |
127 | struct sk_buff *head; | |
128 | struct sk_buff *tail; | |
129 | struct list_head flowchain; | |
130 | s32 deficit; | |
131 | u32 dropped; | |
132 | struct cobalt_vars cvars; | |
133 | u16 srchost; /* index into cake_host table */ | |
134 | u16 dsthost; | |
135 | u8 set; | |
136 | }; /* please try to keep this structure <= 64 bytes */ | |
137 | ||
138 | struct cake_host { | |
139 | u32 srchost_tag; | |
140 | u32 dsthost_tag; | |
71263992 GA |
141 | u16 srchost_bulk_flow_count; |
142 | u16 dsthost_bulk_flow_count; | |
046f6fd5 THJ |
143 | }; |
144 | ||
145 | struct cake_heap_entry { | |
146 | u16 t:3, b:10; | |
147 | }; | |
148 | ||
149 | struct cake_tin_data { | |
150 | struct cake_flow flows[CAKE_QUEUES]; | |
151 | u32 backlogs[CAKE_QUEUES]; | |
152 | u32 tags[CAKE_QUEUES]; /* for set association */ | |
153 | u16 overflow_idx[CAKE_QUEUES]; | |
154 | struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */ | |
155 | u16 flow_quantum; | |
156 | ||
157 | struct cobalt_params cparams; | |
158 | u32 drop_overlimit; | |
159 | u16 bulk_flow_count; | |
160 | u16 sparse_flow_count; | |
161 | u16 decaying_flow_count; | |
162 | u16 unresponsive_flow_count; | |
163 | ||
164 | u32 max_skblen; | |
165 | ||
166 | struct list_head new_flows; | |
167 | struct list_head old_flows; | |
168 | struct list_head decaying_flows; | |
169 | ||
170 | /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ | |
171 | ktime_t time_next_packet; | |
172 | u64 tin_rate_ns; | |
173 | u64 tin_rate_bps; | |
174 | u16 tin_rate_shft; | |
175 | ||
cbd22f17 | 176 | u16 tin_quantum; |
046f6fd5 THJ |
177 | s32 tin_deficit; |
178 | u32 tin_backlog; | |
179 | u32 tin_dropped; | |
180 | u32 tin_ecn_mark; | |
181 | ||
182 | u32 packets; | |
183 | u64 bytes; | |
184 | ||
185 | u32 ack_drops; | |
186 | ||
187 | /* moving averages */ | |
188 | u64 avge_delay; | |
189 | u64 peak_delay; | |
190 | u64 base_delay; | |
191 | ||
192 | /* hash function stats */ | |
193 | u32 way_directs; | |
194 | u32 way_hits; | |
195 | u32 way_misses; | |
196 | u32 way_collisions; | |
197 | }; /* number of tins is small, so size of this struct doesn't matter much */ | |
198 | ||
199 | struct cake_sched_data { | |
200 | struct tcf_proto __rcu *filter_list; /* optional external classifier */ | |
201 | struct tcf_block *block; | |
202 | struct cake_tin_data *tins; | |
203 | ||
204 | struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS]; | |
205 | u16 overflow_timeout; | |
206 | ||
207 | u16 tin_cnt; | |
208 | u8 tin_mode; | |
209 | u8 flow_mode; | |
210 | u8 ack_filter; | |
211 | u8 atm_mode; | |
212 | ||
eab2fc82 THJ |
213 | u32 fwmark_mask; |
214 | u16 fwmark_shft; | |
215 | ||
046f6fd5 THJ |
216 | /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ |
217 | u16 rate_shft; | |
218 | ktime_t time_next_packet; | |
219 | ktime_t failsafe_next_packet; | |
220 | u64 rate_ns; | |
221 | u64 rate_bps; | |
222 | u16 rate_flags; | |
223 | s16 rate_overhead; | |
224 | u16 rate_mpu; | |
225 | u64 interval; | |
226 | u64 target; | |
227 | ||
228 | /* resource tracking */ | |
229 | u32 buffer_used; | |
230 | u32 buffer_max_used; | |
231 | u32 buffer_limit; | |
232 | u32 buffer_config_limit; | |
233 | ||
234 | /* indices for dequeue */ | |
235 | u16 cur_tin; | |
236 | u16 cur_flow; | |
237 | ||
238 | struct qdisc_watchdog watchdog; | |
239 | const u8 *tin_index; | |
240 | const u8 *tin_order; | |
241 | ||
242 | /* bandwidth capacity estimate */ | |
243 | ktime_t last_packet_time; | |
244 | ktime_t avg_window_begin; | |
245 | u64 avg_packet_interval; | |
246 | u64 avg_window_bytes; | |
247 | u64 avg_peak_bandwidth; | |
248 | ktime_t last_reconfig_time; | |
249 | ||
250 | /* packet length stats */ | |
251 | u32 avg_netoff; | |
252 | u16 max_netlen; | |
253 | u16 max_adjlen; | |
254 | u16 min_netlen; | |
255 | u16 min_adjlen; | |
256 | }; | |
257 | ||
258 | enum { | |
259 | CAKE_FLAG_OVERHEAD = BIT(0), | |
260 | CAKE_FLAG_AUTORATE_INGRESS = BIT(1), | |
261 | CAKE_FLAG_INGRESS = BIT(2), | |
262 | CAKE_FLAG_WASH = BIT(3), | |
eab2fc82 | 263 | CAKE_FLAG_SPLIT_GSO = BIT(4) |
046f6fd5 THJ |
264 | }; |
265 | ||
266 | /* COBALT operates the Codel and BLUE algorithms in parallel, in order to | |
267 | * obtain the best features of each. Codel is excellent on flows which | |
268 | * respond to congestion signals in a TCP-like way. BLUE is more effective on | |
269 | * unresponsive flows. | |
270 | */ | |
271 | ||
272 | struct cobalt_skb_cb { | |
273 | ktime_t enqueue_time; | |
a729b7f0 | 274 | u32 adjusted_len; |
046f6fd5 THJ |
275 | }; |
276 | ||
277 | static u64 us_to_ns(u64 us) | |
278 | { | |
279 | return us * NSEC_PER_USEC; | |
280 | } | |
281 | ||
282 | static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb) | |
283 | { | |
284 | qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb)); | |
285 | return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data; | |
286 | } | |
287 | ||
288 | static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb) | |
289 | { | |
290 | return get_cobalt_cb(skb)->enqueue_time; | |
291 | } | |
292 | ||
293 | static void cobalt_set_enqueue_time(struct sk_buff *skb, | |
294 | ktime_t now) | |
295 | { | |
296 | get_cobalt_cb(skb)->enqueue_time = now; | |
297 | } | |
298 | ||
299 | static u16 quantum_div[CAKE_QUEUES + 1] = {0}; | |
300 | ||
83f8fd69 THJ |
301 | /* Diffserv lookup tables */ |
302 | ||
303 | static const u8 precedence[] = { | |
304 | 0, 0, 0, 0, 0, 0, 0, 0, | |
305 | 1, 1, 1, 1, 1, 1, 1, 1, | |
306 | 2, 2, 2, 2, 2, 2, 2, 2, | |
307 | 3, 3, 3, 3, 3, 3, 3, 3, | |
308 | 4, 4, 4, 4, 4, 4, 4, 4, | |
309 | 5, 5, 5, 5, 5, 5, 5, 5, | |
310 | 6, 6, 6, 6, 6, 6, 6, 6, | |
311 | 7, 7, 7, 7, 7, 7, 7, 7, | |
312 | }; | |
313 | ||
314 | static const u8 diffserv8[] = { | |
315 | 2, 5, 1, 2, 4, 2, 2, 2, | |
316 | 0, 2, 1, 2, 1, 2, 1, 2, | |
317 | 5, 2, 4, 2, 4, 2, 4, 2, | |
318 | 3, 2, 3, 2, 3, 2, 3, 2, | |
319 | 6, 2, 3, 2, 3, 2, 3, 2, | |
320 | 6, 2, 2, 2, 6, 2, 6, 2, | |
321 | 7, 2, 2, 2, 2, 2, 2, 2, | |
322 | 7, 2, 2, 2, 2, 2, 2, 2, | |
323 | }; | |
324 | ||
325 | static const u8 diffserv4[] = { | |
326 | 0, 2, 0, 0, 2, 0, 0, 0, | |
327 | 1, 0, 0, 0, 0, 0, 0, 0, | |
328 | 2, 0, 2, 0, 2, 0, 2, 0, | |
329 | 2, 0, 2, 0, 2, 0, 2, 0, | |
330 | 3, 0, 2, 0, 2, 0, 2, 0, | |
331 | 3, 0, 0, 0, 3, 0, 3, 0, | |
332 | 3, 0, 0, 0, 0, 0, 0, 0, | |
333 | 3, 0, 0, 0, 0, 0, 0, 0, | |
334 | }; | |
335 | ||
336 | static const u8 diffserv3[] = { | |
337 | 0, 0, 0, 0, 2, 0, 0, 0, | |
338 | 1, 0, 0, 0, 0, 0, 0, 0, | |
339 | 0, 0, 0, 0, 0, 0, 0, 0, | |
340 | 0, 0, 0, 0, 0, 0, 0, 0, | |
341 | 0, 0, 0, 0, 0, 0, 0, 0, | |
342 | 0, 0, 0, 0, 2, 0, 2, 0, | |
343 | 2, 0, 0, 0, 0, 0, 0, 0, | |
344 | 2, 0, 0, 0, 0, 0, 0, 0, | |
345 | }; | |
346 | ||
347 | static const u8 besteffort[] = { | |
348 | 0, 0, 0, 0, 0, 0, 0, 0, | |
349 | 0, 0, 0, 0, 0, 0, 0, 0, | |
350 | 0, 0, 0, 0, 0, 0, 0, 0, | |
351 | 0, 0, 0, 0, 0, 0, 0, 0, | |
352 | 0, 0, 0, 0, 0, 0, 0, 0, | |
353 | 0, 0, 0, 0, 0, 0, 0, 0, | |
354 | 0, 0, 0, 0, 0, 0, 0, 0, | |
355 | 0, 0, 0, 0, 0, 0, 0, 0, | |
356 | }; | |
357 | ||
358 | /* tin priority order for stats dumping */ | |
359 | ||
360 | static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7}; | |
361 | static const u8 bulk_order[] = {1, 0, 2, 3}; | |
362 | ||
046f6fd5 THJ |
363 | #define REC_INV_SQRT_CACHE (16) |
364 | static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0}; | |
365 | ||
366 | /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots | |
367 | * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) | |
368 | * | |
369 | * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 | |
370 | */ | |
371 | ||
372 | static void cobalt_newton_step(struct cobalt_vars *vars) | |
373 | { | |
374 | u32 invsqrt, invsqrt2; | |
375 | u64 val; | |
376 | ||
377 | invsqrt = vars->rec_inv_sqrt; | |
378 | invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; | |
379 | val = (3LL << 32) - ((u64)vars->count * invsqrt2); | |
380 | ||
381 | val >>= 2; /* avoid overflow in following multiply */ | |
382 | val = (val * invsqrt) >> (32 - 2 + 1); | |
383 | ||
384 | vars->rec_inv_sqrt = val; | |
385 | } | |
386 | ||
387 | static void cobalt_invsqrt(struct cobalt_vars *vars) | |
388 | { | |
389 | if (vars->count < REC_INV_SQRT_CACHE) | |
390 | vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count]; | |
391 | else | |
392 | cobalt_newton_step(vars); | |
393 | } | |
394 | ||
395 | /* There is a big difference in timing between the accurate values placed in | |
396 | * the cache and the approximations given by a single Newton step for small | |
397 | * count values, particularly when stepping from count 1 to 2 or vice versa. | |
398 | * Above 16, a single Newton step gives sufficient accuracy in either | |
399 | * direction, given the precision stored. | |
400 | * | |
401 | * The magnitude of the error when stepping up to count 2 is such as to give | |
402 | * the value that *should* have been produced at count 4. | |
403 | */ | |
404 | ||
405 | static void cobalt_cache_init(void) | |
406 | { | |
407 | struct cobalt_vars v; | |
408 | ||
409 | memset(&v, 0, sizeof(v)); | |
410 | v.rec_inv_sqrt = ~0U; | |
411 | cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt; | |
412 | ||
413 | for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) { | |
414 | cobalt_newton_step(&v); | |
415 | cobalt_newton_step(&v); | |
416 | cobalt_newton_step(&v); | |
417 | cobalt_newton_step(&v); | |
418 | ||
419 | cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt; | |
420 | } | |
421 | } | |
422 | ||
423 | static void cobalt_vars_init(struct cobalt_vars *vars) | |
424 | { | |
425 | memset(vars, 0, sizeof(*vars)); | |
426 | ||
427 | if (!cobalt_rec_inv_sqrt_cache[0]) { | |
428 | cobalt_cache_init(); | |
429 | cobalt_rec_inv_sqrt_cache[0] = ~0; | |
430 | } | |
431 | } | |
432 | ||
433 | /* CoDel control_law is t + interval/sqrt(count) | |
434 | * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid | |
435 | * both sqrt() and divide operation. | |
436 | */ | |
437 | static ktime_t cobalt_control(ktime_t t, | |
438 | u64 interval, | |
439 | u32 rec_inv_sqrt) | |
440 | { | |
441 | return ktime_add_ns(t, reciprocal_scale(interval, | |
442 | rec_inv_sqrt)); | |
443 | } | |
444 | ||
445 | /* Call this when a packet had to be dropped due to queue overflow. Returns | |
446 | * true if the BLUE state was quiescent before but active after this call. | |
447 | */ | |
448 | static bool cobalt_queue_full(struct cobalt_vars *vars, | |
449 | struct cobalt_params *p, | |
450 | ktime_t now) | |
451 | { | |
452 | bool up = false; | |
453 | ||
454 | if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { | |
455 | up = !vars->p_drop; | |
456 | vars->p_drop += p->p_inc; | |
457 | if (vars->p_drop < p->p_inc) | |
458 | vars->p_drop = ~0; | |
459 | vars->blue_timer = now; | |
460 | } | |
461 | vars->dropping = true; | |
462 | vars->drop_next = now; | |
463 | if (!vars->count) | |
464 | vars->count = 1; | |
465 | ||
466 | return up; | |
467 | } | |
468 | ||
469 | /* Call this when the queue was serviced but turned out to be empty. Returns | |
470 | * true if the BLUE state was active before but quiescent after this call. | |
471 | */ | |
472 | static bool cobalt_queue_empty(struct cobalt_vars *vars, | |
473 | struct cobalt_params *p, | |
474 | ktime_t now) | |
475 | { | |
476 | bool down = false; | |
477 | ||
478 | if (vars->p_drop && | |
479 | ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { | |
480 | if (vars->p_drop < p->p_dec) | |
481 | vars->p_drop = 0; | |
482 | else | |
483 | vars->p_drop -= p->p_dec; | |
484 | vars->blue_timer = now; | |
485 | down = !vars->p_drop; | |
486 | } | |
487 | vars->dropping = false; | |
488 | ||
489 | if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { | |
490 | vars->count--; | |
491 | cobalt_invsqrt(vars); | |
492 | vars->drop_next = cobalt_control(vars->drop_next, | |
493 | p->interval, | |
494 | vars->rec_inv_sqrt); | |
495 | } | |
496 | ||
497 | return down; | |
498 | } | |
499 | ||
500 | /* Call this with a freshly dequeued packet for possible congestion marking. | |
501 | * Returns true as an instruction to drop the packet, false for delivery. | |
502 | */ | |
503 | static bool cobalt_should_drop(struct cobalt_vars *vars, | |
504 | struct cobalt_params *p, | |
505 | ktime_t now, | |
7298de9c THJ |
506 | struct sk_buff *skb, |
507 | u32 bulk_flows) | |
046f6fd5 THJ |
508 | { |
509 | bool next_due, over_target, drop = false; | |
510 | ktime_t schedule; | |
511 | u64 sojourn; | |
512 | ||
513 | /* The 'schedule' variable records, in its sign, whether 'now' is before or | |
514 | * after 'drop_next'. This allows 'drop_next' to be updated before the next | |
515 | * scheduling decision is actually branched, without destroying that | |
516 | * information. Similarly, the first 'schedule' value calculated is preserved | |
517 | * in the boolean 'next_due'. | |
518 | * | |
519 | * As for 'drop_next', we take advantage of the fact that 'interval' is both | |
520 | * the delay between first exceeding 'target' and the first signalling event, | |
521 | * *and* the scaling factor for the signalling frequency. It's therefore very | |
522 | * natural to use a single mechanism for both purposes, and eliminates a | |
523 | * significant amount of reference Codel's spaghetti code. To help with this, | |
524 | * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close | |
525 | * as possible to 1.0 in fixed-point. | |
526 | */ | |
527 | ||
528 | sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); | |
529 | schedule = ktime_sub(now, vars->drop_next); | |
530 | over_target = sojourn > p->target && | |
7298de9c | 531 | sojourn > p->mtu_time * bulk_flows * 2 && |
046f6fd5 THJ |
532 | sojourn > p->mtu_time * 4; |
533 | next_due = vars->count && ktime_to_ns(schedule) >= 0; | |
534 | ||
535 | vars->ecn_marked = false; | |
536 | ||
537 | if (over_target) { | |
538 | if (!vars->dropping) { | |
539 | vars->dropping = true; | |
540 | vars->drop_next = cobalt_control(now, | |
541 | p->interval, | |
542 | vars->rec_inv_sqrt); | |
543 | } | |
544 | if (!vars->count) | |
545 | vars->count = 1; | |
546 | } else if (vars->dropping) { | |
547 | vars->dropping = false; | |
548 | } | |
549 | ||
550 | if (next_due && vars->dropping) { | |
551 | /* Use ECN mark if possible, otherwise drop */ | |
552 | drop = !(vars->ecn_marked = INET_ECN_set_ce(skb)); | |
553 | ||
554 | vars->count++; | |
555 | if (!vars->count) | |
556 | vars->count--; | |
557 | cobalt_invsqrt(vars); | |
558 | vars->drop_next = cobalt_control(vars->drop_next, | |
559 | p->interval, | |
560 | vars->rec_inv_sqrt); | |
561 | schedule = ktime_sub(now, vars->drop_next); | |
562 | } else { | |
563 | while (next_due) { | |
564 | vars->count--; | |
565 | cobalt_invsqrt(vars); | |
566 | vars->drop_next = cobalt_control(vars->drop_next, | |
567 | p->interval, | |
568 | vars->rec_inv_sqrt); | |
569 | schedule = ktime_sub(now, vars->drop_next); | |
570 | next_due = vars->count && ktime_to_ns(schedule) >= 0; | |
571 | } | |
572 | } | |
573 | ||
574 | /* Simple BLUE implementation. Lack of ECN is deliberate. */ | |
575 | if (vars->p_drop) | |
576 | drop |= (prandom_u32() < vars->p_drop); | |
577 | ||
578 | /* Overload the drop_next field as an activity timeout */ | |
579 | if (!vars->count) | |
580 | vars->drop_next = ktime_add_ns(now, p->interval); | |
581 | else if (ktime_to_ns(schedule) > 0 && !drop) | |
582 | vars->drop_next = now; | |
583 | ||
584 | return drop; | |
585 | } | |
586 | ||
ea825115 THJ |
587 | static void cake_update_flowkeys(struct flow_keys *keys, |
588 | const struct sk_buff *skb) | |
589 | { | |
590 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
591 | struct nf_conntrack_tuple tuple = {}; | |
592 | bool rev = !skb->_nfct; | |
593 | ||
594 | if (tc_skb_protocol(skb) != htons(ETH_P_IP)) | |
595 | return; | |
596 | ||
597 | if (!nf_ct_get_tuple_skb(&tuple, skb)) | |
598 | return; | |
599 | ||
600 | keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; | |
601 | keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; | |
602 | ||
603 | if (keys->ports.ports) { | |
604 | keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all; | |
605 | keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all; | |
606 | } | |
607 | #endif | |
608 | } | |
609 | ||
046f6fd5 THJ |
610 | /* Cake has several subtle multiple bit settings. In these cases you |
611 | * would be matching triple isolate mode as well. | |
612 | */ | |
613 | ||
614 | static bool cake_dsrc(int flow_mode) | |
615 | { | |
616 | return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC; | |
617 | } | |
618 | ||
619 | static bool cake_ddst(int flow_mode) | |
620 | { | |
621 | return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST; | |
622 | } | |
623 | ||
624 | static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, | |
93cfb6c1 | 625 | int flow_mode, u16 flow_override, u16 host_override) |
046f6fd5 | 626 | { |
93cfb6c1 | 627 | u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; |
046f6fd5 THJ |
628 | u16 reduced_hash, srchost_idx, dsthost_idx; |
629 | struct flow_keys keys, host_keys; | |
630 | ||
631 | if (unlikely(flow_mode == CAKE_FLOW_NONE)) | |
632 | return 0; | |
633 | ||
93cfb6c1 THJ |
634 | /* If both overrides are set we can skip packet dissection entirely */ |
635 | if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && | |
636 | (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) | |
637 | goto skip_hash; | |
638 | ||
046f6fd5 THJ |
639 | skb_flow_dissect_flow_keys(skb, &keys, |
640 | FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); | |
641 | ||
ea825115 THJ |
642 | if (flow_mode & CAKE_FLOW_NAT_FLAG) |
643 | cake_update_flowkeys(&keys, skb); | |
644 | ||
046f6fd5 THJ |
645 | /* flow_hash_from_keys() sorts the addresses by value, so we have |
646 | * to preserve their order in a separate data structure to treat | |
647 | * src and dst host addresses as independently selectable. | |
648 | */ | |
649 | host_keys = keys; | |
650 | host_keys.ports.ports = 0; | |
651 | host_keys.basic.ip_proto = 0; | |
652 | host_keys.keyid.keyid = 0; | |
653 | host_keys.tags.flow_label = 0; | |
654 | ||
655 | switch (host_keys.control.addr_type) { | |
656 | case FLOW_DISSECTOR_KEY_IPV4_ADDRS: | |
657 | host_keys.addrs.v4addrs.src = 0; | |
658 | dsthost_hash = flow_hash_from_keys(&host_keys); | |
659 | host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; | |
660 | host_keys.addrs.v4addrs.dst = 0; | |
661 | srchost_hash = flow_hash_from_keys(&host_keys); | |
662 | break; | |
663 | ||
664 | case FLOW_DISSECTOR_KEY_IPV6_ADDRS: | |
665 | memset(&host_keys.addrs.v6addrs.src, 0, | |
666 | sizeof(host_keys.addrs.v6addrs.src)); | |
667 | dsthost_hash = flow_hash_from_keys(&host_keys); | |
668 | host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; | |
669 | memset(&host_keys.addrs.v6addrs.dst, 0, | |
670 | sizeof(host_keys.addrs.v6addrs.dst)); | |
671 | srchost_hash = flow_hash_from_keys(&host_keys); | |
672 | break; | |
673 | ||
674 | default: | |
675 | dsthost_hash = 0; | |
676 | srchost_hash = 0; | |
677 | } | |
678 | ||
679 | /* This *must* be after the above switch, since as a | |
680 | * side-effect it sorts the src and dst addresses. | |
681 | */ | |
682 | if (flow_mode & CAKE_FLOW_FLOWS) | |
683 | flow_hash = flow_hash_from_keys(&keys); | |
684 | ||
93cfb6c1 THJ |
685 | skip_hash: |
686 | if (flow_override) | |
687 | flow_hash = flow_override - 1; | |
688 | if (host_override) { | |
689 | dsthost_hash = host_override - 1; | |
690 | srchost_hash = host_override - 1; | |
691 | } | |
692 | ||
046f6fd5 THJ |
693 | if (!(flow_mode & CAKE_FLOW_FLOWS)) { |
694 | if (flow_mode & CAKE_FLOW_SRC_IP) | |
695 | flow_hash ^= srchost_hash; | |
696 | ||
697 | if (flow_mode & CAKE_FLOW_DST_IP) | |
698 | flow_hash ^= dsthost_hash; | |
699 | } | |
700 | ||
701 | reduced_hash = flow_hash % CAKE_QUEUES; | |
702 | ||
703 | /* set-associative hashing */ | |
704 | /* fast path if no hash collision (direct lookup succeeds) */ | |
705 | if (likely(q->tags[reduced_hash] == flow_hash && | |
706 | q->flows[reduced_hash].set)) { | |
707 | q->way_directs++; | |
708 | } else { | |
709 | u32 inner_hash = reduced_hash % CAKE_SET_WAYS; | |
710 | u32 outer_hash = reduced_hash - inner_hash; | |
711 | bool allocate_src = false; | |
712 | bool allocate_dst = false; | |
713 | u32 i, k; | |
714 | ||
715 | /* check if any active queue in the set is reserved for | |
716 | * this flow. | |
717 | */ | |
718 | for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; | |
719 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
720 | if (q->tags[outer_hash + k] == flow_hash) { | |
721 | if (i) | |
722 | q->way_hits++; | |
723 | ||
724 | if (!q->flows[outer_hash + k].set) { | |
725 | /* need to increment host refcnts */ | |
726 | allocate_src = cake_dsrc(flow_mode); | |
727 | allocate_dst = cake_ddst(flow_mode); | |
728 | } | |
729 | ||
730 | goto found; | |
731 | } | |
732 | } | |
733 | ||
734 | /* no queue is reserved for this flow, look for an | |
735 | * empty one. | |
736 | */ | |
737 | for (i = 0; i < CAKE_SET_WAYS; | |
738 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
739 | if (!q->flows[outer_hash + k].set) { | |
740 | q->way_misses++; | |
741 | allocate_src = cake_dsrc(flow_mode); | |
742 | allocate_dst = cake_ddst(flow_mode); | |
743 | goto found; | |
744 | } | |
745 | } | |
746 | ||
747 | /* With no empty queues, default to the original | |
748 | * queue, accept the collision, update the host tags. | |
749 | */ | |
750 | q->way_collisions++; | |
71263992 GA |
751 | if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { |
752 | q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; | |
753 | q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; | |
754 | } | |
046f6fd5 THJ |
755 | allocate_src = cake_dsrc(flow_mode); |
756 | allocate_dst = cake_ddst(flow_mode); | |
757 | found: | |
758 | /* reserve queue for future packets in same flow */ | |
759 | reduced_hash = outer_hash + k; | |
760 | q->tags[reduced_hash] = flow_hash; | |
761 | ||
762 | if (allocate_src) { | |
763 | srchost_idx = srchost_hash % CAKE_QUEUES; | |
764 | inner_hash = srchost_idx % CAKE_SET_WAYS; | |
765 | outer_hash = srchost_idx - inner_hash; | |
766 | for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; | |
767 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
768 | if (q->hosts[outer_hash + k].srchost_tag == | |
769 | srchost_hash) | |
770 | goto found_src; | |
771 | } | |
772 | for (i = 0; i < CAKE_SET_WAYS; | |
773 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
71263992 | 774 | if (!q->hosts[outer_hash + k].srchost_bulk_flow_count) |
046f6fd5 THJ |
775 | break; |
776 | } | |
777 | q->hosts[outer_hash + k].srchost_tag = srchost_hash; | |
778 | found_src: | |
779 | srchost_idx = outer_hash + k; | |
71263992 GA |
780 | if (q->flows[reduced_hash].set == CAKE_SET_BULK) |
781 | q->hosts[srchost_idx].srchost_bulk_flow_count++; | |
046f6fd5 THJ |
782 | q->flows[reduced_hash].srchost = srchost_idx; |
783 | } | |
784 | ||
785 | if (allocate_dst) { | |
786 | dsthost_idx = dsthost_hash % CAKE_QUEUES; | |
787 | inner_hash = dsthost_idx % CAKE_SET_WAYS; | |
788 | outer_hash = dsthost_idx - inner_hash; | |
789 | for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; | |
790 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
791 | if (q->hosts[outer_hash + k].dsthost_tag == | |
792 | dsthost_hash) | |
793 | goto found_dst; | |
794 | } | |
795 | for (i = 0; i < CAKE_SET_WAYS; | |
796 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
71263992 | 797 | if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count) |
046f6fd5 THJ |
798 | break; |
799 | } | |
800 | q->hosts[outer_hash + k].dsthost_tag = dsthost_hash; | |
801 | found_dst: | |
802 | dsthost_idx = outer_hash + k; | |
71263992 GA |
803 | if (q->flows[reduced_hash].set == CAKE_SET_BULK) |
804 | q->hosts[dsthost_idx].dsthost_bulk_flow_count++; | |
046f6fd5 THJ |
805 | q->flows[reduced_hash].dsthost = dsthost_idx; |
806 | } | |
807 | } | |
808 | ||
809 | return reduced_hash; | |
810 | } | |
811 | ||
812 | /* helper functions : might be changed when/if skb use a standard list_head */ | |
813 | /* remove one skb from head of slot queue */ | |
814 | ||
815 | static struct sk_buff *dequeue_head(struct cake_flow *flow) | |
816 | { | |
817 | struct sk_buff *skb = flow->head; | |
818 | ||
819 | if (skb) { | |
820 | flow->head = skb->next; | |
a8305bff | 821 | skb_mark_not_on_list(skb); |
046f6fd5 THJ |
822 | } |
823 | ||
824 | return skb; | |
825 | } | |
826 | ||
827 | /* add skb to flow queue (tail add) */ | |
828 | ||
829 | static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) | |
830 | { | |
831 | if (!flow->head) | |
832 | flow->head = skb; | |
833 | else | |
834 | flow->tail->next = skb; | |
835 | flow->tail = skb; | |
836 | skb->next = NULL; | |
837 | } | |
838 | ||
8b713881 THJ |
839 | static struct iphdr *cake_get_iphdr(const struct sk_buff *skb, |
840 | struct ipv6hdr *buf) | |
841 | { | |
842 | unsigned int offset = skb_network_offset(skb); | |
843 | struct iphdr *iph; | |
844 | ||
845 | iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf); | |
846 | ||
847 | if (!iph) | |
848 | return NULL; | |
849 | ||
850 | if (iph->version == 4 && iph->protocol == IPPROTO_IPV6) | |
851 | return skb_header_pointer(skb, offset + iph->ihl * 4, | |
852 | sizeof(struct ipv6hdr), buf); | |
853 | ||
854 | else if (iph->version == 4) | |
855 | return iph; | |
856 | ||
857 | else if (iph->version == 6) | |
858 | return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr), | |
859 | buf); | |
860 | ||
861 | return NULL; | |
862 | } | |
863 | ||
864 | static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb, | |
865 | void *buf, unsigned int bufsize) | |
866 | { | |
867 | unsigned int offset = skb_network_offset(skb); | |
868 | const struct ipv6hdr *ipv6h; | |
869 | const struct tcphdr *tcph; | |
870 | const struct iphdr *iph; | |
871 | struct ipv6hdr _ipv6h; | |
872 | struct tcphdr _tcph; | |
873 | ||
874 | ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); | |
875 | ||
876 | if (!ipv6h) | |
877 | return NULL; | |
878 | ||
879 | if (ipv6h->version == 4) { | |
880 | iph = (struct iphdr *)ipv6h; | |
881 | offset += iph->ihl * 4; | |
882 | ||
883 | /* special-case 6in4 tunnelling, as that is a common way to get | |
884 | * v6 connectivity in the home | |
885 | */ | |
886 | if (iph->protocol == IPPROTO_IPV6) { | |
887 | ipv6h = skb_header_pointer(skb, offset, | |
888 | sizeof(_ipv6h), &_ipv6h); | |
889 | ||
890 | if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP) | |
891 | return NULL; | |
892 | ||
893 | offset += sizeof(struct ipv6hdr); | |
894 | ||
895 | } else if (iph->protocol != IPPROTO_TCP) { | |
896 | return NULL; | |
897 | } | |
898 | ||
899 | } else if (ipv6h->version == 6) { | |
900 | if (ipv6h->nexthdr != IPPROTO_TCP) | |
901 | return NULL; | |
902 | ||
903 | offset += sizeof(struct ipv6hdr); | |
904 | } else { | |
905 | return NULL; | |
906 | } | |
907 | ||
908 | tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); | |
909 | if (!tcph) | |
910 | return NULL; | |
911 | ||
912 | return skb_header_pointer(skb, offset, | |
913 | min(__tcp_hdrlen(tcph), bufsize), buf); | |
914 | } | |
915 | ||
916 | static const void *cake_get_tcpopt(const struct tcphdr *tcph, | |
917 | int code, int *oplen) | |
918 | { | |
919 | /* inspired by tcp_parse_options in tcp_input.c */ | |
920 | int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr); | |
921 | const u8 *ptr = (const u8 *)(tcph + 1); | |
922 | ||
923 | while (length > 0) { | |
924 | int opcode = *ptr++; | |
925 | int opsize; | |
926 | ||
927 | if (opcode == TCPOPT_EOL) | |
928 | break; | |
929 | if (opcode == TCPOPT_NOP) { | |
930 | length--; | |
931 | continue; | |
932 | } | |
933 | opsize = *ptr++; | |
934 | if (opsize < 2 || opsize > length) | |
935 | break; | |
936 | ||
937 | if (opcode == code) { | |
938 | *oplen = opsize; | |
939 | return ptr; | |
940 | } | |
941 | ||
942 | ptr += opsize - 2; | |
943 | length -= opsize; | |
944 | } | |
945 | ||
946 | return NULL; | |
947 | } | |
948 | ||
949 | /* Compare two SACK sequences. A sequence is considered greater if it SACKs more | |
950 | * bytes than the other. In the case where both sequences ACKs bytes that the | |
951 | * other doesn't, A is considered greater. DSACKs in A also makes A be | |
952 | * considered greater. | |
953 | * | |
954 | * @return -1, 0 or 1 as normal compare functions | |
955 | */ | |
956 | static int cake_tcph_sack_compare(const struct tcphdr *tcph_a, | |
957 | const struct tcphdr *tcph_b) | |
958 | { | |
959 | const struct tcp_sack_block_wire *sack_a, *sack_b; | |
960 | u32 ack_seq_a = ntohl(tcph_a->ack_seq); | |
961 | u32 bytes_a = 0, bytes_b = 0; | |
962 | int oplen_a, oplen_b; | |
963 | bool first = true; | |
964 | ||
965 | sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a); | |
966 | sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b); | |
967 | ||
968 | /* pointers point to option contents */ | |
969 | oplen_a -= TCPOLEN_SACK_BASE; | |
970 | oplen_b -= TCPOLEN_SACK_BASE; | |
971 | ||
972 | if (sack_a && oplen_a >= sizeof(*sack_a) && | |
973 | (!sack_b || oplen_b < sizeof(*sack_b))) | |
974 | return -1; | |
975 | else if (sack_b && oplen_b >= sizeof(*sack_b) && | |
976 | (!sack_a || oplen_a < sizeof(*sack_a))) | |
977 | return 1; | |
978 | else if ((!sack_a || oplen_a < sizeof(*sack_a)) && | |
979 | (!sack_b || oplen_b < sizeof(*sack_b))) | |
980 | return 0; | |
981 | ||
982 | while (oplen_a >= sizeof(*sack_a)) { | |
983 | const struct tcp_sack_block_wire *sack_tmp = sack_b; | |
984 | u32 start_a = get_unaligned_be32(&sack_a->start_seq); | |
985 | u32 end_a = get_unaligned_be32(&sack_a->end_seq); | |
986 | int oplen_tmp = oplen_b; | |
987 | bool found = false; | |
988 | ||
989 | /* DSACK; always considered greater to prevent dropping */ | |
990 | if (before(start_a, ack_seq_a)) | |
991 | return -1; | |
992 | ||
993 | bytes_a += end_a - start_a; | |
994 | ||
995 | while (oplen_tmp >= sizeof(*sack_tmp)) { | |
996 | u32 start_b = get_unaligned_be32(&sack_tmp->start_seq); | |
997 | u32 end_b = get_unaligned_be32(&sack_tmp->end_seq); | |
998 | ||
999 | /* first time through we count the total size */ | |
1000 | if (first) | |
1001 | bytes_b += end_b - start_b; | |
1002 | ||
1003 | if (!after(start_b, start_a) && !before(end_b, end_a)) { | |
1004 | found = true; | |
1005 | if (!first) | |
1006 | break; | |
1007 | } | |
1008 | oplen_tmp -= sizeof(*sack_tmp); | |
1009 | sack_tmp++; | |
1010 | } | |
1011 | ||
1012 | if (!found) | |
1013 | return -1; | |
1014 | ||
1015 | oplen_a -= sizeof(*sack_a); | |
1016 | sack_a++; | |
1017 | first = false; | |
1018 | } | |
1019 | ||
1020 | /* If we made it this far, all ranges SACKed by A are covered by B, so | |
1021 | * either the SACKs are equal, or B SACKs more bytes. | |
1022 | */ | |
1023 | return bytes_b > bytes_a ? 1 : 0; | |
1024 | } | |
1025 | ||
1026 | static void cake_tcph_get_tstamp(const struct tcphdr *tcph, | |
1027 | u32 *tsval, u32 *tsecr) | |
1028 | { | |
1029 | const u8 *ptr; | |
1030 | int opsize; | |
1031 | ||
1032 | ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize); | |
1033 | ||
1034 | if (ptr && opsize == TCPOLEN_TIMESTAMP) { | |
1035 | *tsval = get_unaligned_be32(ptr); | |
1036 | *tsecr = get_unaligned_be32(ptr + 4); | |
1037 | } | |
1038 | } | |
1039 | ||
1040 | static bool cake_tcph_may_drop(const struct tcphdr *tcph, | |
1041 | u32 tstamp_new, u32 tsecr_new) | |
1042 | { | |
1043 | /* inspired by tcp_parse_options in tcp_input.c */ | |
1044 | int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr); | |
1045 | const u8 *ptr = (const u8 *)(tcph + 1); | |
1046 | u32 tstamp, tsecr; | |
1047 | ||
1048 | /* 3 reserved flags must be unset to avoid future breakage | |
1049 | * ACK must be set | |
1050 | * ECE/CWR are handled separately | |
1051 | * All other flags URG/PSH/RST/SYN/FIN must be unset | |
1052 | * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero) | |
1053 | * 0x00C00000 = CWR/ECE (handled separately) | |
1054 | * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000 | |
1055 | */ | |
1056 | if (((tcp_flag_word(tcph) & | |
1057 | cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK)) | |
1058 | return false; | |
1059 | ||
1060 | while (length > 0) { | |
1061 | int opcode = *ptr++; | |
1062 | int opsize; | |
1063 | ||
1064 | if (opcode == TCPOPT_EOL) | |
1065 | break; | |
1066 | if (opcode == TCPOPT_NOP) { | |
1067 | length--; | |
1068 | continue; | |
1069 | } | |
1070 | opsize = *ptr++; | |
1071 | if (opsize < 2 || opsize > length) | |
1072 | break; | |
1073 | ||
1074 | switch (opcode) { | |
1075 | case TCPOPT_MD5SIG: /* doesn't influence state */ | |
1076 | break; | |
1077 | ||
1078 | case TCPOPT_SACK: /* stricter checking performed later */ | |
1079 | if (opsize % 8 != 2) | |
1080 | return false; | |
1081 | break; | |
1082 | ||
1083 | case TCPOPT_TIMESTAMP: | |
1084 | /* only drop timestamps lower than new */ | |
1085 | if (opsize != TCPOLEN_TIMESTAMP) | |
1086 | return false; | |
1087 | tstamp = get_unaligned_be32(ptr); | |
1088 | tsecr = get_unaligned_be32(ptr + 4); | |
1089 | if (after(tstamp, tstamp_new) || | |
1090 | after(tsecr, tsecr_new)) | |
1091 | return false; | |
1092 | break; | |
1093 | ||
1094 | case TCPOPT_MSS: /* these should only be set on SYN */ | |
1095 | case TCPOPT_WINDOW: | |
1096 | case TCPOPT_SACK_PERM: | |
1097 | case TCPOPT_FASTOPEN: | |
1098 | case TCPOPT_EXP: | |
1099 | default: /* don't drop if any unknown options are present */ | |
1100 | return false; | |
1101 | } | |
1102 | ||
1103 | ptr += opsize - 2; | |
1104 | length -= opsize; | |
1105 | } | |
1106 | ||
1107 | return true; | |
1108 | } | |
1109 | ||
1110 | static struct sk_buff *cake_ack_filter(struct cake_sched_data *q, | |
1111 | struct cake_flow *flow) | |
1112 | { | |
1113 | bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE; | |
1114 | struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL; | |
1115 | struct sk_buff *skb_check, *skb_prev = NULL; | |
1116 | const struct ipv6hdr *ipv6h, *ipv6h_check; | |
1117 | unsigned char _tcph[64], _tcph_check[64]; | |
1118 | const struct tcphdr *tcph, *tcph_check; | |
1119 | const struct iphdr *iph, *iph_check; | |
1120 | struct ipv6hdr _iph, _iph_check; | |
1121 | const struct sk_buff *skb; | |
1122 | int seglen, num_found = 0; | |
1123 | u32 tstamp = 0, tsecr = 0; | |
1124 | __be32 elig_flags = 0; | |
1125 | int sack_comp; | |
1126 | ||
1127 | /* no other possible ACKs to filter */ | |
1128 | if (flow->head == flow->tail) | |
1129 | return NULL; | |
1130 | ||
1131 | skb = flow->tail; | |
1132 | tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph)); | |
1133 | iph = cake_get_iphdr(skb, &_iph); | |
1134 | if (!tcph) | |
1135 | return NULL; | |
1136 | ||
1137 | cake_tcph_get_tstamp(tcph, &tstamp, &tsecr); | |
1138 | ||
1139 | /* the 'triggering' packet need only have the ACK flag set. | |
1140 | * also check that SYN is not set, as there won't be any previous ACKs. | |
1141 | */ | |
1142 | if ((tcp_flag_word(tcph) & | |
1143 | (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK) | |
1144 | return NULL; | |
1145 | ||
1146 | /* the 'triggering' ACK is at the tail of the queue, we have already | |
1147 | * returned if it is the only packet in the flow. loop through the rest | |
1148 | * of the queue looking for pure ACKs with the same 5-tuple as the | |
1149 | * triggering one. | |
1150 | */ | |
1151 | for (skb_check = flow->head; | |
1152 | skb_check && skb_check != skb; | |
1153 | skb_prev = skb_check, skb_check = skb_check->next) { | |
1154 | iph_check = cake_get_iphdr(skb_check, &_iph_check); | |
1155 | tcph_check = cake_get_tcphdr(skb_check, &_tcph_check, | |
1156 | sizeof(_tcph_check)); | |
1157 | ||
1158 | /* only TCP packets with matching 5-tuple are eligible, and only | |
1159 | * drop safe headers | |
1160 | */ | |
1161 | if (!tcph_check || iph->version != iph_check->version || | |
1162 | tcph_check->source != tcph->source || | |
1163 | tcph_check->dest != tcph->dest) | |
1164 | continue; | |
1165 | ||
1166 | if (iph_check->version == 4) { | |
1167 | if (iph_check->saddr != iph->saddr || | |
1168 | iph_check->daddr != iph->daddr) | |
1169 | continue; | |
1170 | ||
1171 | seglen = ntohs(iph_check->tot_len) - | |
1172 | (4 * iph_check->ihl); | |
1173 | } else if (iph_check->version == 6) { | |
1174 | ipv6h = (struct ipv6hdr *)iph; | |
1175 | ipv6h_check = (struct ipv6hdr *)iph_check; | |
1176 | ||
1177 | if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) || | |
1178 | ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr)) | |
1179 | continue; | |
1180 | ||
1181 | seglen = ntohs(ipv6h_check->payload_len); | |
1182 | } else { | |
1183 | WARN_ON(1); /* shouldn't happen */ | |
1184 | continue; | |
1185 | } | |
1186 | ||
1187 | /* If the ECE/CWR flags changed from the previous eligible | |
1188 | * packet in the same flow, we should no longer be dropping that | |
1189 | * previous packet as this would lose information. | |
1190 | */ | |
1191 | if (elig_ack && (tcp_flag_word(tcph_check) & | |
1192 | (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) { | |
1193 | elig_ack = NULL; | |
1194 | elig_ack_prev = NULL; | |
1195 | num_found--; | |
1196 | } | |
1197 | ||
1198 | /* Check TCP options and flags, don't drop ACKs with segment | |
1199 | * data, and don't drop ACKs with a higher cumulative ACK | |
1200 | * counter than the triggering packet. Check ACK seqno here to | |
1201 | * avoid parsing SACK options of packets we are going to exclude | |
1202 | * anyway. | |
1203 | */ | |
1204 | if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) || | |
1205 | (seglen - __tcp_hdrlen(tcph_check)) != 0 || | |
1206 | after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq))) | |
1207 | continue; | |
1208 | ||
1209 | /* Check SACK options. The triggering packet must SACK more data | |
1210 | * than the ACK under consideration, or SACK the same range but | |
1211 | * have a larger cumulative ACK counter. The latter is a | |
1212 | * pathological case, but is contained in the following check | |
1213 | * anyway, just to be safe. | |
1214 | */ | |
1215 | sack_comp = cake_tcph_sack_compare(tcph_check, tcph); | |
1216 | ||
1217 | if (sack_comp < 0 || | |
1218 | (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) && | |
1219 | sack_comp == 0)) | |
1220 | continue; | |
1221 | ||
1222 | /* At this point we have found an eligible pure ACK to drop; if | |
1223 | * we are in aggressive mode, we are done. Otherwise, keep | |
1224 | * searching unless this is the second eligible ACK we | |
1225 | * found. | |
1226 | * | |
1227 | * Since we want to drop ACK closest to the head of the queue, | |
1228 | * save the first eligible ACK we find, even if we need to loop | |
1229 | * again. | |
1230 | */ | |
1231 | if (!elig_ack) { | |
1232 | elig_ack = skb_check; | |
1233 | elig_ack_prev = skb_prev; | |
1234 | elig_flags = (tcp_flag_word(tcph_check) | |
1235 | & (TCP_FLAG_ECE | TCP_FLAG_CWR)); | |
1236 | } | |
1237 | ||
1238 | if (num_found++ > 0) | |
1239 | goto found; | |
1240 | } | |
1241 | ||
1242 | /* We made it through the queue without finding two eligible ACKs . If | |
1243 | * we found a single eligible ACK we can drop it in aggressive mode if | |
1244 | * we can guarantee that this does not interfere with ECN flag | |
1245 | * information. We ensure this by dropping it only if the enqueued | |
1246 | * packet is consecutive with the eligible ACK, and their flags match. | |
1247 | */ | |
1248 | if (elig_ack && aggressive && elig_ack->next == skb && | |
1249 | (elig_flags == (tcp_flag_word(tcph) & | |
1250 | (TCP_FLAG_ECE | TCP_FLAG_CWR)))) | |
1251 | goto found; | |
1252 | ||
1253 | return NULL; | |
1254 | ||
1255 | found: | |
1256 | if (elig_ack_prev) | |
1257 | elig_ack_prev->next = elig_ack->next; | |
1258 | else | |
1259 | flow->head = elig_ack->next; | |
1260 | ||
a8305bff | 1261 | skb_mark_not_on_list(elig_ack); |
8b713881 THJ |
1262 | |
1263 | return elig_ack; | |
1264 | } | |
1265 | ||
046f6fd5 THJ |
1266 | static u64 cake_ewma(u64 avg, u64 sample, u32 shift) |
1267 | { | |
1268 | avg -= avg >> shift; | |
1269 | avg += sample >> shift; | |
1270 | return avg; | |
1271 | } | |
1272 | ||
a729b7f0 THJ |
1273 | static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off) |
1274 | { | |
1275 | if (q->rate_flags & CAKE_FLAG_OVERHEAD) | |
1276 | len -= off; | |
1277 | ||
1278 | if (q->max_netlen < len) | |
1279 | q->max_netlen = len; | |
1280 | if (q->min_netlen > len) | |
1281 | q->min_netlen = len; | |
1282 | ||
1283 | len += q->rate_overhead; | |
1284 | ||
1285 | if (len < q->rate_mpu) | |
1286 | len = q->rate_mpu; | |
1287 | ||
1288 | if (q->atm_mode == CAKE_ATM_ATM) { | |
1289 | len += 47; | |
1290 | len /= 48; | |
1291 | len *= 53; | |
1292 | } else if (q->atm_mode == CAKE_ATM_PTM) { | |
1293 | /* Add one byte per 64 bytes or part thereof. | |
1294 | * This is conservative and easier to calculate than the | |
1295 | * precise value. | |
1296 | */ | |
1297 | len += (len + 63) / 64; | |
1298 | } | |
1299 | ||
1300 | if (q->max_adjlen < len) | |
1301 | q->max_adjlen = len; | |
1302 | if (q->min_adjlen > len) | |
1303 | q->min_adjlen = len; | |
1304 | ||
1305 | return len; | |
1306 | } | |
1307 | ||
1308 | static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) | |
1309 | { | |
1310 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | |
1311 | unsigned int hdr_len, last_len = 0; | |
1312 | u32 off = skb_network_offset(skb); | |
1313 | u32 len = qdisc_pkt_len(skb); | |
1314 | u16 segs = 1; | |
1315 | ||
1316 | q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); | |
1317 | ||
1318 | if (!shinfo->gso_size) | |
1319 | return cake_calc_overhead(q, len, off); | |
1320 | ||
1321 | /* borrowed from qdisc_pkt_len_init() */ | |
1322 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); | |
1323 | ||
1324 | /* + transport layer */ | |
1325 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | | |
1326 | SKB_GSO_TCPV6))) { | |
1327 | const struct tcphdr *th; | |
1328 | struct tcphdr _tcphdr; | |
1329 | ||
1330 | th = skb_header_pointer(skb, skb_transport_offset(skb), | |
1331 | sizeof(_tcphdr), &_tcphdr); | |
1332 | if (likely(th)) | |
1333 | hdr_len += __tcp_hdrlen(th); | |
1334 | } else { | |
1335 | struct udphdr _udphdr; | |
1336 | ||
1337 | if (skb_header_pointer(skb, skb_transport_offset(skb), | |
1338 | sizeof(_udphdr), &_udphdr)) | |
1339 | hdr_len += sizeof(struct udphdr); | |
1340 | } | |
1341 | ||
1342 | if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) | |
1343 | segs = DIV_ROUND_UP(skb->len - hdr_len, | |
1344 | shinfo->gso_size); | |
1345 | else | |
1346 | segs = shinfo->gso_segs; | |
1347 | ||
1348 | len = shinfo->gso_size + hdr_len; | |
1349 | last_len = skb->len - shinfo->gso_size * (segs - 1); | |
1350 | ||
1351 | return (cake_calc_overhead(q, len, off) * (segs - 1) + | |
1352 | cake_calc_overhead(q, last_len, off)); | |
1353 | } | |
1354 | ||
046f6fd5 THJ |
1355 | static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j) |
1356 | { | |
1357 | struct cake_heap_entry ii = q->overflow_heap[i]; | |
1358 | struct cake_heap_entry jj = q->overflow_heap[j]; | |
1359 | ||
1360 | q->overflow_heap[i] = jj; | |
1361 | q->overflow_heap[j] = ii; | |
1362 | ||
1363 | q->tins[ii.t].overflow_idx[ii.b] = j; | |
1364 | q->tins[jj.t].overflow_idx[jj.b] = i; | |
1365 | } | |
1366 | ||
1367 | static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i) | |
1368 | { | |
1369 | struct cake_heap_entry ii = q->overflow_heap[i]; | |
1370 | ||
1371 | return q->tins[ii.t].backlogs[ii.b]; | |
1372 | } | |
1373 | ||
1374 | static void cake_heapify(struct cake_sched_data *q, u16 i) | |
1375 | { | |
1376 | static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES; | |
1377 | u32 mb = cake_heap_get_backlog(q, i); | |
1378 | u32 m = i; | |
1379 | ||
1380 | while (m < a) { | |
1381 | u32 l = m + m + 1; | |
1382 | u32 r = l + 1; | |
1383 | ||
1384 | if (l < a) { | |
1385 | u32 lb = cake_heap_get_backlog(q, l); | |
1386 | ||
1387 | if (lb > mb) { | |
1388 | m = l; | |
1389 | mb = lb; | |
1390 | } | |
1391 | } | |
1392 | ||
1393 | if (r < a) { | |
1394 | u32 rb = cake_heap_get_backlog(q, r); | |
1395 | ||
1396 | if (rb > mb) { | |
1397 | m = r; | |
1398 | mb = rb; | |
1399 | } | |
1400 | } | |
1401 | ||
1402 | if (m != i) { | |
1403 | cake_heap_swap(q, i, m); | |
1404 | i = m; | |
1405 | } else { | |
1406 | break; | |
1407 | } | |
1408 | } | |
1409 | } | |
1410 | ||
1411 | static void cake_heapify_up(struct cake_sched_data *q, u16 i) | |
1412 | { | |
1413 | while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) { | |
1414 | u16 p = (i - 1) >> 1; | |
1415 | u32 ib = cake_heap_get_backlog(q, i); | |
1416 | u32 pb = cake_heap_get_backlog(q, p); | |
1417 | ||
1418 | if (ib > pb) { | |
1419 | cake_heap_swap(q, i, p); | |
1420 | i = p; | |
1421 | } else { | |
1422 | break; | |
1423 | } | |
1424 | } | |
1425 | } | |
1426 | ||
1427 | static int cake_advance_shaper(struct cake_sched_data *q, | |
1428 | struct cake_tin_data *b, | |
1429 | struct sk_buff *skb, | |
1430 | ktime_t now, bool drop) | |
1431 | { | |
a729b7f0 | 1432 | u32 len = get_cobalt_cb(skb)->adjusted_len; |
046f6fd5 THJ |
1433 | |
1434 | /* charge packet bandwidth to this tin | |
1435 | * and to the global shaper. | |
1436 | */ | |
1437 | if (q->rate_ns) { | |
1438 | u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft; | |
1439 | u64 global_dur = (len * q->rate_ns) >> q->rate_shft; | |
1440 | u64 failsafe_dur = global_dur + (global_dur >> 1); | |
1441 | ||
1442 | if (ktime_before(b->time_next_packet, now)) | |
1443 | b->time_next_packet = ktime_add_ns(b->time_next_packet, | |
1444 | tin_dur); | |
1445 | ||
1446 | else if (ktime_before(b->time_next_packet, | |
1447 | ktime_add_ns(now, tin_dur))) | |
1448 | b->time_next_packet = ktime_add_ns(now, tin_dur); | |
1449 | ||
1450 | q->time_next_packet = ktime_add_ns(q->time_next_packet, | |
1451 | global_dur); | |
1452 | if (!drop) | |
1453 | q->failsafe_next_packet = \ | |
1454 | ktime_add_ns(q->failsafe_next_packet, | |
1455 | failsafe_dur); | |
1456 | } | |
1457 | return len; | |
1458 | } | |
1459 | ||
1460 | static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) | |
1461 | { | |
1462 | struct cake_sched_data *q = qdisc_priv(sch); | |
1463 | ktime_t now = ktime_get(); | |
1464 | u32 idx = 0, tin = 0, len; | |
1465 | struct cake_heap_entry qq; | |
1466 | struct cake_tin_data *b; | |
1467 | struct cake_flow *flow; | |
1468 | struct sk_buff *skb; | |
1469 | ||
1470 | if (!q->overflow_timeout) { | |
1471 | int i; | |
1472 | /* Build fresh max-heap */ | |
1473 | for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--) | |
1474 | cake_heapify(q, i); | |
1475 | } | |
1476 | q->overflow_timeout = 65535; | |
1477 | ||
1478 | /* select longest queue for pruning */ | |
1479 | qq = q->overflow_heap[0]; | |
1480 | tin = qq.t; | |
1481 | idx = qq.b; | |
1482 | ||
1483 | b = &q->tins[tin]; | |
1484 | flow = &b->flows[idx]; | |
1485 | skb = dequeue_head(flow); | |
1486 | if (unlikely(!skb)) { | |
1487 | /* heap has gone wrong, rebuild it next time */ | |
1488 | q->overflow_timeout = 0; | |
1489 | return idx + (tin << 16); | |
1490 | } | |
1491 | ||
1492 | if (cobalt_queue_full(&flow->cvars, &b->cparams, now)) | |
1493 | b->unresponsive_flow_count++; | |
1494 | ||
1495 | len = qdisc_pkt_len(skb); | |
1496 | q->buffer_used -= skb->truesize; | |
1497 | b->backlogs[idx] -= len; | |
1498 | b->tin_backlog -= len; | |
1499 | sch->qstats.backlog -= len; | |
1500 | qdisc_tree_reduce_backlog(sch, 1, len); | |
1501 | ||
1502 | flow->dropped++; | |
1503 | b->tin_dropped++; | |
1504 | sch->qstats.drops++; | |
1505 | ||
7298de9c THJ |
1506 | if (q->rate_flags & CAKE_FLAG_INGRESS) |
1507 | cake_advance_shaper(q, b, skb, now, true); | |
1508 | ||
046f6fd5 THJ |
1509 | __qdisc_drop(skb, to_free); |
1510 | sch->q.qlen--; | |
1511 | ||
1512 | cake_heapify(q, 0); | |
1513 | ||
1514 | return idx + (tin << 16); | |
1515 | } | |
1516 | ||
83f8fd69 THJ |
1517 | static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) |
1518 | { | |
c87b4ecd | 1519 | int wlen = skb_network_offset(skb); |
83f8fd69 THJ |
1520 | u8 dscp; |
1521 | ||
b2100cc5 | 1522 | switch (tc_skb_protocol(skb)) { |
83f8fd69 | 1523 | case htons(ETH_P_IP): |
c87b4ecd THJ |
1524 | wlen += sizeof(struct iphdr); |
1525 | if (!pskb_may_pull(skb, wlen) || | |
1526 | skb_try_make_writable(skb, wlen)) | |
1527 | return 0; | |
1528 | ||
83f8fd69 THJ |
1529 | dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; |
1530 | if (wash && dscp) | |
1531 | ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); | |
1532 | return dscp; | |
1533 | ||
1534 | case htons(ETH_P_IPV6): | |
c87b4ecd THJ |
1535 | wlen += sizeof(struct ipv6hdr); |
1536 | if (!pskb_may_pull(skb, wlen) || | |
1537 | skb_try_make_writable(skb, wlen)) | |
1538 | return 0; | |
1539 | ||
83f8fd69 THJ |
1540 | dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; |
1541 | if (wash && dscp) | |
1542 | ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); | |
1543 | return dscp; | |
1544 | ||
1545 | case htons(ETH_P_ARP): | |
1546 | return 0x38; /* CS7 - Net Control */ | |
1547 | ||
1548 | default: | |
1549 | /* If there is no Diffserv field, treat as best-effort */ | |
1550 | return 0; | |
1551 | } | |
1552 | } | |
1553 | ||
1554 | static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, | |
1555 | struct sk_buff *skb) | |
1556 | { | |
1557 | struct cake_sched_data *q = qdisc_priv(sch); | |
eab2fc82 | 1558 | u32 tin, mark; |
4976e3c6 | 1559 | u8 dscp; |
83f8fd69 | 1560 | |
4976e3c6 THJ |
1561 | /* Tin selection: Default to diffserv-based selection, allow overriding |
1562 | * using firewall marks or skb->priority. | |
1563 | */ | |
1564 | dscp = cake_handle_diffserv(skb, | |
1565 | q->rate_flags & CAKE_FLAG_WASH); | |
eab2fc82 | 1566 | mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; |
83f8fd69 | 1567 | |
4976e3c6 | 1568 | if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) |
83f8fd69 | 1569 | tin = 0; |
4976e3c6 | 1570 | |
eab2fc82 THJ |
1571 | else if (mark && mark <= q->tin_cnt) |
1572 | tin = q->tin_order[mark - 1]; | |
4976e3c6 THJ |
1573 | |
1574 | else if (TC_H_MAJ(skb->priority) == sch->handle && | |
1575 | TC_H_MIN(skb->priority) > 0 && | |
1576 | TC_H_MIN(skb->priority) <= q->tin_cnt) | |
1577 | tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; | |
1578 | ||
1579 | else { | |
1580 | tin = q->tin_index[dscp]; | |
1581 | ||
1582 | if (unlikely(tin >= q->tin_cnt)) | |
1583 | tin = 0; | |
83f8fd69 THJ |
1584 | } |
1585 | ||
1586 | return &q->tins[tin]; | |
1587 | } | |
1588 | ||
1589 | static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, | |
046f6fd5 THJ |
1590 | struct sk_buff *skb, int flow_mode, int *qerr) |
1591 | { | |
1592 | struct cake_sched_data *q = qdisc_priv(sch); | |
1593 | struct tcf_proto *filter; | |
1594 | struct tcf_result res; | |
93cfb6c1 | 1595 | u16 flow = 0, host = 0; |
046f6fd5 THJ |
1596 | int result; |
1597 | ||
1598 | filter = rcu_dereference_bh(q->filter_list); | |
1599 | if (!filter) | |
83f8fd69 | 1600 | goto hash; |
046f6fd5 THJ |
1601 | |
1602 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | |
1603 | result = tcf_classify(skb, filter, &res, false); | |
83f8fd69 | 1604 | |
046f6fd5 THJ |
1605 | if (result >= 0) { |
1606 | #ifdef CONFIG_NET_CLS_ACT | |
1607 | switch (result) { | |
1608 | case TC_ACT_STOLEN: | |
1609 | case TC_ACT_QUEUED: | |
1610 | case TC_ACT_TRAP: | |
1611 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | |
1612 | /* fall through */ | |
1613 | case TC_ACT_SHOT: | |
1614 | return 0; | |
1615 | } | |
1616 | #endif | |
1617 | if (TC_H_MIN(res.classid) <= CAKE_QUEUES) | |
83f8fd69 | 1618 | flow = TC_H_MIN(res.classid); |
93cfb6c1 THJ |
1619 | if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16)) |
1620 | host = TC_H_MAJ(res.classid) >> 16; | |
046f6fd5 | 1621 | } |
83f8fd69 THJ |
1622 | hash: |
1623 | *t = cake_select_tin(sch, skb); | |
93cfb6c1 | 1624 | return cake_hash(*t, skb, flow_mode, flow, host) + 1; |
046f6fd5 THJ |
1625 | } |
1626 | ||
7298de9c THJ |
1627 | static void cake_reconfigure(struct Qdisc *sch); |
1628 | ||
046f6fd5 THJ |
1629 | static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
1630 | struct sk_buff **to_free) | |
1631 | { | |
1632 | struct cake_sched_data *q = qdisc_priv(sch); | |
1633 | int len = qdisc_pkt_len(skb); | |
1634 | int uninitialized_var(ret); | |
8b713881 | 1635 | struct sk_buff *ack = NULL; |
046f6fd5 THJ |
1636 | ktime_t now = ktime_get(); |
1637 | struct cake_tin_data *b; | |
1638 | struct cake_flow *flow; | |
83f8fd69 | 1639 | u32 idx; |
046f6fd5 THJ |
1640 | |
1641 | /* choose flow to insert into */ | |
83f8fd69 | 1642 | idx = cake_classify(sch, &b, skb, q->flow_mode, &ret); |
046f6fd5 THJ |
1643 | if (idx == 0) { |
1644 | if (ret & __NET_XMIT_BYPASS) | |
1645 | qdisc_qstats_drop(sch); | |
1646 | __qdisc_drop(skb, to_free); | |
1647 | return ret; | |
1648 | } | |
1649 | idx--; | |
1650 | flow = &b->flows[idx]; | |
1651 | ||
1652 | /* ensure shaper state isn't stale */ | |
1653 | if (!b->tin_backlog) { | |
1654 | if (ktime_before(b->time_next_packet, now)) | |
1655 | b->time_next_packet = now; | |
1656 | ||
1657 | if (!sch->q.qlen) { | |
1658 | if (ktime_before(q->time_next_packet, now)) { | |
1659 | q->failsafe_next_packet = now; | |
1660 | q->time_next_packet = now; | |
1661 | } else if (ktime_after(q->time_next_packet, now) && | |
1662 | ktime_after(q->failsafe_next_packet, now)) { | |
1663 | u64 next = \ | |
1664 | min(ktime_to_ns(q->time_next_packet), | |
1665 | ktime_to_ns( | |
1666 | q->failsafe_next_packet)); | |
1667 | sch->qstats.overlimits++; | |
1668 | qdisc_watchdog_schedule_ns(&q->watchdog, next); | |
1669 | } | |
1670 | } | |
1671 | } | |
1672 | ||
1673 | if (unlikely(len > b->max_skblen)) | |
1674 | b->max_skblen = len; | |
1675 | ||
0c850344 THJ |
1676 | if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { |
1677 | struct sk_buff *segs, *nskb; | |
1678 | netdev_features_t features = netif_skb_features(skb); | |
8c6c37fd | 1679 | unsigned int slen = 0, numsegs = 0; |
0c850344 THJ |
1680 | |
1681 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | |
1682 | if (IS_ERR_OR_NULL(segs)) | |
1683 | return qdisc_drop(skb, sch, to_free); | |
1684 | ||
b950d8a5 | 1685 | skb_list_walk_safe(segs, segs, nskb) { |
a8305bff | 1686 | skb_mark_not_on_list(segs); |
0c850344 THJ |
1687 | qdisc_skb_cb(segs)->pkt_len = segs->len; |
1688 | cobalt_set_enqueue_time(segs, now); | |
1689 | get_cobalt_cb(segs)->adjusted_len = cake_overhead(q, | |
1690 | segs); | |
1691 | flow_queue_add(flow, segs); | |
1692 | ||
1693 | sch->q.qlen++; | |
8c6c37fd | 1694 | numsegs++; |
0c850344 THJ |
1695 | slen += segs->len; |
1696 | q->buffer_used += segs->truesize; | |
1697 | b->packets++; | |
0c850344 | 1698 | } |
8b713881 | 1699 | |
0c850344 THJ |
1700 | /* stats */ |
1701 | b->bytes += slen; | |
1702 | b->backlogs[idx] += slen; | |
1703 | b->tin_backlog += slen; | |
1704 | sch->qstats.backlog += slen; | |
1705 | q->avg_window_bytes += slen; | |
8b713881 | 1706 | |
8c6c37fd | 1707 | qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); |
0c850344 | 1708 | consume_skb(skb); |
8b713881 | 1709 | } else { |
0c850344 THJ |
1710 | /* not splitting */ |
1711 | cobalt_set_enqueue_time(skb, now); | |
1712 | get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb); | |
1713 | flow_queue_add(flow, skb); | |
1714 | ||
1715 | if (q->ack_filter) | |
1716 | ack = cake_ack_filter(q, flow); | |
1717 | ||
1718 | if (ack) { | |
1719 | b->ack_drops++; | |
1720 | sch->qstats.drops++; | |
1721 | b->bytes += qdisc_pkt_len(ack); | |
1722 | len -= qdisc_pkt_len(ack); | |
1723 | q->buffer_used += skb->truesize - ack->truesize; | |
1724 | if (q->rate_flags & CAKE_FLAG_INGRESS) | |
1725 | cake_advance_shaper(q, b, ack, now, true); | |
1726 | ||
1727 | qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack)); | |
1728 | consume_skb(ack); | |
1729 | } else { | |
1730 | sch->q.qlen++; | |
1731 | q->buffer_used += skb->truesize; | |
1732 | } | |
046f6fd5 | 1733 | |
0c850344 THJ |
1734 | /* stats */ |
1735 | b->packets++; | |
1736 | b->bytes += len; | |
1737 | b->backlogs[idx] += len; | |
1738 | b->tin_backlog += len; | |
1739 | sch->qstats.backlog += len; | |
1740 | q->avg_window_bytes += len; | |
1741 | } | |
046f6fd5 THJ |
1742 | |
1743 | if (q->overflow_timeout) | |
1744 | cake_heapify_up(q, b->overflow_idx[idx]); | |
1745 | ||
1746 | /* incoming bandwidth capacity estimate */ | |
7298de9c THJ |
1747 | if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) { |
1748 | u64 packet_interval = \ | |
1749 | ktime_to_ns(ktime_sub(now, q->last_packet_time)); | |
1750 | ||
1751 | if (packet_interval > NSEC_PER_SEC) | |
1752 | packet_interval = NSEC_PER_SEC; | |
1753 | ||
1754 | /* filter out short-term bursts, eg. wifi aggregation */ | |
1755 | q->avg_packet_interval = \ | |
1756 | cake_ewma(q->avg_packet_interval, | |
1757 | packet_interval, | |
1758 | (packet_interval > q->avg_packet_interval ? | |
1759 | 2 : 8)); | |
1760 | ||
1761 | q->last_packet_time = now; | |
1762 | ||
1763 | if (packet_interval > q->avg_packet_interval) { | |
1764 | u64 window_interval = \ | |
1765 | ktime_to_ns(ktime_sub(now, | |
1766 | q->avg_window_begin)); | |
1767 | u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; | |
1768 | ||
68aab823 | 1769 | b = div64_u64(b, window_interval); |
7298de9c THJ |
1770 | q->avg_peak_bandwidth = |
1771 | cake_ewma(q->avg_peak_bandwidth, b, | |
1772 | b > q->avg_peak_bandwidth ? 2 : 8); | |
1773 | q->avg_window_bytes = 0; | |
1774 | q->avg_window_begin = now; | |
1775 | ||
1776 | if (ktime_after(now, | |
1777 | ktime_add_ms(q->last_reconfig_time, | |
1778 | 250))) { | |
1779 | q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4; | |
1780 | cake_reconfigure(sch); | |
1781 | } | |
1782 | } | |
1783 | } else { | |
1784 | q->avg_window_bytes = 0; | |
1785 | q->last_packet_time = now; | |
1786 | } | |
046f6fd5 THJ |
1787 | |
1788 | /* flowchain */ | |
1789 | if (!flow->set || flow->set == CAKE_SET_DECAYING) { | |
1790 | struct cake_host *srchost = &b->hosts[flow->srchost]; | |
1791 | struct cake_host *dsthost = &b->hosts[flow->dsthost]; | |
1792 | u16 host_load = 1; | |
1793 | ||
1794 | if (!flow->set) { | |
1795 | list_add_tail(&flow->flowchain, &b->new_flows); | |
1796 | } else { | |
1797 | b->decaying_flow_count--; | |
1798 | list_move_tail(&flow->flowchain, &b->new_flows); | |
1799 | } | |
1800 | flow->set = CAKE_SET_SPARSE; | |
1801 | b->sparse_flow_count++; | |
1802 | ||
1803 | if (cake_dsrc(q->flow_mode)) | |
71263992 | 1804 | host_load = max(host_load, srchost->srchost_bulk_flow_count); |
046f6fd5 THJ |
1805 | |
1806 | if (cake_ddst(q->flow_mode)) | |
71263992 | 1807 | host_load = max(host_load, dsthost->dsthost_bulk_flow_count); |
046f6fd5 THJ |
1808 | |
1809 | flow->deficit = (b->flow_quantum * | |
1810 | quantum_div[host_load]) >> 16; | |
1811 | } else if (flow->set == CAKE_SET_SPARSE_WAIT) { | |
71263992 GA |
1812 | struct cake_host *srchost = &b->hosts[flow->srchost]; |
1813 | struct cake_host *dsthost = &b->hosts[flow->dsthost]; | |
1814 | ||
046f6fd5 THJ |
1815 | /* this flow was empty, accounted as a sparse flow, but actually |
1816 | * in the bulk rotation. | |
1817 | */ | |
1818 | flow->set = CAKE_SET_BULK; | |
1819 | b->sparse_flow_count--; | |
1820 | b->bulk_flow_count++; | |
71263992 GA |
1821 | |
1822 | if (cake_dsrc(q->flow_mode)) | |
1823 | srchost->srchost_bulk_flow_count++; | |
1824 | ||
1825 | if (cake_ddst(q->flow_mode)) | |
1826 | dsthost->dsthost_bulk_flow_count++; | |
1827 | ||
046f6fd5 THJ |
1828 | } |
1829 | ||
1830 | if (q->buffer_used > q->buffer_max_used) | |
1831 | q->buffer_max_used = q->buffer_used; | |
1832 | ||
1833 | if (q->buffer_used > q->buffer_limit) { | |
1834 | u32 dropped = 0; | |
1835 | ||
1836 | while (q->buffer_used > q->buffer_limit) { | |
1837 | dropped++; | |
1838 | cake_drop(sch, to_free); | |
1839 | } | |
1840 | b->drop_overlimit += dropped; | |
1841 | } | |
1842 | return NET_XMIT_SUCCESS; | |
1843 | } | |
1844 | ||
1845 | static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) | |
1846 | { | |
1847 | struct cake_sched_data *q = qdisc_priv(sch); | |
1848 | struct cake_tin_data *b = &q->tins[q->cur_tin]; | |
1849 | struct cake_flow *flow = &b->flows[q->cur_flow]; | |
1850 | struct sk_buff *skb = NULL; | |
1851 | u32 len; | |
1852 | ||
1853 | if (flow->head) { | |
1854 | skb = dequeue_head(flow); | |
1855 | len = qdisc_pkt_len(skb); | |
1856 | b->backlogs[q->cur_flow] -= len; | |
1857 | b->tin_backlog -= len; | |
1858 | sch->qstats.backlog -= len; | |
1859 | q->buffer_used -= skb->truesize; | |
1860 | sch->q.qlen--; | |
1861 | ||
1862 | if (q->overflow_timeout) | |
1863 | cake_heapify(q, b->overflow_idx[q->cur_flow]); | |
1864 | } | |
1865 | return skb; | |
1866 | } | |
1867 | ||
1868 | /* Discard leftover packets from a tin no longer in use. */ | |
1869 | static void cake_clear_tin(struct Qdisc *sch, u16 tin) | |
1870 | { | |
1871 | struct cake_sched_data *q = qdisc_priv(sch); | |
1872 | struct sk_buff *skb; | |
1873 | ||
1874 | q->cur_tin = tin; | |
1875 | for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++) | |
1876 | while (!!(skb = cake_dequeue_one(sch))) | |
1877 | kfree_skb(skb); | |
1878 | } | |
1879 | ||
1880 | static struct sk_buff *cake_dequeue(struct Qdisc *sch) | |
1881 | { | |
1882 | struct cake_sched_data *q = qdisc_priv(sch); | |
1883 | struct cake_tin_data *b = &q->tins[q->cur_tin]; | |
1884 | struct cake_host *srchost, *dsthost; | |
1885 | ktime_t now = ktime_get(); | |
1886 | struct cake_flow *flow; | |
1887 | struct list_head *head; | |
1888 | bool first_flow = true; | |
1889 | struct sk_buff *skb; | |
1890 | u16 host_load; | |
1891 | u64 delay; | |
1892 | u32 len; | |
1893 | ||
1894 | begin: | |
1895 | if (!sch->q.qlen) | |
1896 | return NULL; | |
1897 | ||
1898 | /* global hard shaper */ | |
1899 | if (ktime_after(q->time_next_packet, now) && | |
1900 | ktime_after(q->failsafe_next_packet, now)) { | |
1901 | u64 next = min(ktime_to_ns(q->time_next_packet), | |
1902 | ktime_to_ns(q->failsafe_next_packet)); | |
1903 | ||
1904 | sch->qstats.overlimits++; | |
1905 | qdisc_watchdog_schedule_ns(&q->watchdog, next); | |
1906 | return NULL; | |
1907 | } | |
1908 | ||
1909 | /* Choose a class to work on. */ | |
1910 | if (!q->rate_ns) { | |
1911 | /* In unlimited mode, can't rely on shaper timings, just balance | |
1912 | * with DRR | |
1913 | */ | |
1914 | bool wrapped = false, empty = true; | |
1915 | ||
1916 | while (b->tin_deficit < 0 || | |
1917 | !(b->sparse_flow_count + b->bulk_flow_count)) { | |
1918 | if (b->tin_deficit <= 0) | |
cbd22f17 | 1919 | b->tin_deficit += b->tin_quantum; |
046f6fd5 THJ |
1920 | if (b->sparse_flow_count + b->bulk_flow_count) |
1921 | empty = false; | |
1922 | ||
1923 | q->cur_tin++; | |
1924 | b++; | |
1925 | if (q->cur_tin >= q->tin_cnt) { | |
1926 | q->cur_tin = 0; | |
1927 | b = q->tins; | |
1928 | ||
1929 | if (wrapped) { | |
1930 | /* It's possible for q->qlen to be | |
1931 | * nonzero when we actually have no | |
1932 | * packets anywhere. | |
1933 | */ | |
1934 | if (empty) | |
1935 | return NULL; | |
1936 | } else { | |
1937 | wrapped = true; | |
1938 | } | |
1939 | } | |
1940 | } | |
1941 | } else { | |
1942 | /* In shaped mode, choose: | |
1943 | * - Highest-priority tin with queue and meeting schedule, or | |
1944 | * - The earliest-scheduled tin with queue. | |
1945 | */ | |
1946 | ktime_t best_time = KTIME_MAX; | |
1947 | int tin, best_tin = 0; | |
1948 | ||
1949 | for (tin = 0; tin < q->tin_cnt; tin++) { | |
1950 | b = q->tins + tin; | |
1951 | if ((b->sparse_flow_count + b->bulk_flow_count) > 0) { | |
1952 | ktime_t time_to_pkt = \ | |
1953 | ktime_sub(b->time_next_packet, now); | |
1954 | ||
1955 | if (ktime_to_ns(time_to_pkt) <= 0 || | |
1956 | ktime_compare(time_to_pkt, | |
1957 | best_time) <= 0) { | |
1958 | best_time = time_to_pkt; | |
1959 | best_tin = tin; | |
1960 | } | |
1961 | } | |
1962 | } | |
1963 | ||
1964 | q->cur_tin = best_tin; | |
1965 | b = q->tins + best_tin; | |
1966 | ||
1967 | /* No point in going further if no packets to deliver. */ | |
1968 | if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count))) | |
1969 | return NULL; | |
1970 | } | |
1971 | ||
1972 | retry: | |
1973 | /* service this class */ | |
1974 | head = &b->decaying_flows; | |
1975 | if (!first_flow || list_empty(head)) { | |
1976 | head = &b->new_flows; | |
1977 | if (list_empty(head)) { | |
1978 | head = &b->old_flows; | |
1979 | if (unlikely(list_empty(head))) { | |
1980 | head = &b->decaying_flows; | |
1981 | if (unlikely(list_empty(head))) | |
1982 | goto begin; | |
1983 | } | |
1984 | } | |
1985 | } | |
1986 | flow = list_first_entry(head, struct cake_flow, flowchain); | |
1987 | q->cur_flow = flow - b->flows; | |
1988 | first_flow = false; | |
1989 | ||
1990 | /* triple isolation (modified DRR++) */ | |
1991 | srchost = &b->hosts[flow->srchost]; | |
1992 | dsthost = &b->hosts[flow->dsthost]; | |
1993 | host_load = 1; | |
1994 | ||
046f6fd5 THJ |
1995 | /* flow isolation (DRR++) */ |
1996 | if (flow->deficit <= 0) { | |
046f6fd5 THJ |
1997 | /* Keep all flows with deficits out of the sparse and decaying |
1998 | * rotations. No non-empty flow can go into the decaying | |
1999 | * rotation, so they can't get deficits | |
2000 | */ | |
2001 | if (flow->set == CAKE_SET_SPARSE) { | |
2002 | if (flow->head) { | |
2003 | b->sparse_flow_count--; | |
2004 | b->bulk_flow_count++; | |
71263992 GA |
2005 | |
2006 | if (cake_dsrc(q->flow_mode)) | |
2007 | srchost->srchost_bulk_flow_count++; | |
2008 | ||
2009 | if (cake_ddst(q->flow_mode)) | |
2010 | dsthost->dsthost_bulk_flow_count++; | |
2011 | ||
046f6fd5 THJ |
2012 | flow->set = CAKE_SET_BULK; |
2013 | } else { | |
2014 | /* we've moved it to the bulk rotation for | |
2015 | * correct deficit accounting but we still want | |
2016 | * to count it as a sparse flow, not a bulk one. | |
2017 | */ | |
2018 | flow->set = CAKE_SET_SPARSE_WAIT; | |
2019 | } | |
2020 | } | |
71263992 GA |
2021 | |
2022 | if (cake_dsrc(q->flow_mode)) | |
2023 | host_load = max(host_load, srchost->srchost_bulk_flow_count); | |
2024 | ||
2025 | if (cake_ddst(q->flow_mode)) | |
2026 | host_load = max(host_load, dsthost->dsthost_bulk_flow_count); | |
2027 | ||
2028 | WARN_ON(host_load > CAKE_QUEUES); | |
2029 | ||
2030 | /* The shifted prandom_u32() is a way to apply dithering to | |
2031 | * avoid accumulating roundoff errors | |
2032 | */ | |
2033 | flow->deficit += (b->flow_quantum * quantum_div[host_load] + | |
2034 | (prandom_u32() >> 16)) >> 16; | |
2035 | list_move_tail(&flow->flowchain, &b->old_flows); | |
2036 | ||
046f6fd5 THJ |
2037 | goto retry; |
2038 | } | |
2039 | ||
2040 | /* Retrieve a packet via the AQM */ | |
2041 | while (1) { | |
2042 | skb = cake_dequeue_one(sch); | |
2043 | if (!skb) { | |
2044 | /* this queue was actually empty */ | |
2045 | if (cobalt_queue_empty(&flow->cvars, &b->cparams, now)) | |
2046 | b->unresponsive_flow_count--; | |
2047 | ||
2048 | if (flow->cvars.p_drop || flow->cvars.count || | |
2049 | ktime_before(now, flow->cvars.drop_next)) { | |
2050 | /* keep in the flowchain until the state has | |
2051 | * decayed to rest | |
2052 | */ | |
2053 | list_move_tail(&flow->flowchain, | |
2054 | &b->decaying_flows); | |
2055 | if (flow->set == CAKE_SET_BULK) { | |
2056 | b->bulk_flow_count--; | |
71263992 GA |
2057 | |
2058 | if (cake_dsrc(q->flow_mode)) | |
2059 | srchost->srchost_bulk_flow_count--; | |
2060 | ||
2061 | if (cake_ddst(q->flow_mode)) | |
2062 | dsthost->dsthost_bulk_flow_count--; | |
2063 | ||
046f6fd5 THJ |
2064 | b->decaying_flow_count++; |
2065 | } else if (flow->set == CAKE_SET_SPARSE || | |
2066 | flow->set == CAKE_SET_SPARSE_WAIT) { | |
2067 | b->sparse_flow_count--; | |
2068 | b->decaying_flow_count++; | |
2069 | } | |
2070 | flow->set = CAKE_SET_DECAYING; | |
2071 | } else { | |
2072 | /* remove empty queue from the flowchain */ | |
2073 | list_del_init(&flow->flowchain); | |
2074 | if (flow->set == CAKE_SET_SPARSE || | |
2075 | flow->set == CAKE_SET_SPARSE_WAIT) | |
2076 | b->sparse_flow_count--; | |
71263992 | 2077 | else if (flow->set == CAKE_SET_BULK) { |
046f6fd5 | 2078 | b->bulk_flow_count--; |
71263992 GA |
2079 | |
2080 | if (cake_dsrc(q->flow_mode)) | |
2081 | srchost->srchost_bulk_flow_count--; | |
2082 | ||
2083 | if (cake_ddst(q->flow_mode)) | |
2084 | dsthost->dsthost_bulk_flow_count--; | |
2085 | ||
2086 | } else | |
046f6fd5 THJ |
2087 | b->decaying_flow_count--; |
2088 | ||
2089 | flow->set = CAKE_SET_NONE; | |
046f6fd5 THJ |
2090 | } |
2091 | goto begin; | |
2092 | } | |
2093 | ||
2094 | /* Last packet in queue may be marked, shouldn't be dropped */ | |
7298de9c THJ |
2095 | if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, |
2096 | (b->bulk_flow_count * | |
2097 | !!(q->rate_flags & | |
2098 | CAKE_FLAG_INGRESS))) || | |
046f6fd5 THJ |
2099 | !flow->head) |
2100 | break; | |
2101 | ||
7298de9c THJ |
2102 | /* drop this packet, get another one */ |
2103 | if (q->rate_flags & CAKE_FLAG_INGRESS) { | |
2104 | len = cake_advance_shaper(q, b, skb, | |
2105 | now, true); | |
2106 | flow->deficit -= len; | |
2107 | b->tin_deficit -= len; | |
2108 | } | |
046f6fd5 THJ |
2109 | flow->dropped++; |
2110 | b->tin_dropped++; | |
2111 | qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); | |
2112 | qdisc_qstats_drop(sch); | |
2113 | kfree_skb(skb); | |
7298de9c THJ |
2114 | if (q->rate_flags & CAKE_FLAG_INGRESS) |
2115 | goto retry; | |
046f6fd5 THJ |
2116 | } |
2117 | ||
2118 | b->tin_ecn_mark += !!flow->cvars.ecn_marked; | |
2119 | qdisc_bstats_update(sch, skb); | |
2120 | ||
2121 | /* collect delay stats */ | |
2122 | delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); | |
2123 | b->avge_delay = cake_ewma(b->avge_delay, delay, 8); | |
2124 | b->peak_delay = cake_ewma(b->peak_delay, delay, | |
2125 | delay > b->peak_delay ? 2 : 8); | |
2126 | b->base_delay = cake_ewma(b->base_delay, delay, | |
2127 | delay < b->base_delay ? 2 : 8); | |
2128 | ||
2129 | len = cake_advance_shaper(q, b, skb, now, false); | |
2130 | flow->deficit -= len; | |
2131 | b->tin_deficit -= len; | |
2132 | ||
2133 | if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { | |
2134 | u64 next = min(ktime_to_ns(q->time_next_packet), | |
2135 | ktime_to_ns(q->failsafe_next_packet)); | |
2136 | ||
2137 | qdisc_watchdog_schedule_ns(&q->watchdog, next); | |
2138 | } else if (!sch->q.qlen) { | |
2139 | int i; | |
2140 | ||
2141 | for (i = 0; i < q->tin_cnt; i++) { | |
2142 | if (q->tins[i].decaying_flow_count) { | |
2143 | ktime_t next = \ | |
2144 | ktime_add_ns(now, | |
2145 | q->tins[i].cparams.target); | |
2146 | ||
2147 | qdisc_watchdog_schedule_ns(&q->watchdog, | |
2148 | ktime_to_ns(next)); | |
2149 | break; | |
2150 | } | |
2151 | } | |
2152 | } | |
2153 | ||
2154 | if (q->overflow_timeout) | |
2155 | q->overflow_timeout--; | |
2156 | ||
2157 | return skb; | |
2158 | } | |
2159 | ||
2160 | static void cake_reset(struct Qdisc *sch) | |
2161 | { | |
2162 | u32 c; | |
2163 | ||
2164 | for (c = 0; c < CAKE_MAX_TINS; c++) | |
2165 | cake_clear_tin(sch, c); | |
2166 | } | |
2167 | ||
2168 | static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { | |
2169 | [TCA_CAKE_BASE_RATE64] = { .type = NLA_U64 }, | |
2170 | [TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 }, | |
2171 | [TCA_CAKE_ATM] = { .type = NLA_U32 }, | |
2172 | [TCA_CAKE_FLOW_MODE] = { .type = NLA_U32 }, | |
2173 | [TCA_CAKE_OVERHEAD] = { .type = NLA_S32 }, | |
2174 | [TCA_CAKE_RTT] = { .type = NLA_U32 }, | |
2175 | [TCA_CAKE_TARGET] = { .type = NLA_U32 }, | |
2176 | [TCA_CAKE_AUTORATE] = { .type = NLA_U32 }, | |
2177 | [TCA_CAKE_MEMORY] = { .type = NLA_U32 }, | |
2178 | [TCA_CAKE_NAT] = { .type = NLA_U32 }, | |
2179 | [TCA_CAKE_RAW] = { .type = NLA_U32 }, | |
2180 | [TCA_CAKE_WASH] = { .type = NLA_U32 }, | |
2181 | [TCA_CAKE_MPU] = { .type = NLA_U32 }, | |
2182 | [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, | |
2183 | [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, | |
b3c424eb | 2184 | [TCA_CAKE_SPLIT_GSO] = { .type = NLA_U32 }, |
eab2fc82 | 2185 | [TCA_CAKE_FWMARK] = { .type = NLA_U32 }, |
046f6fd5 THJ |
2186 | }; |
2187 | ||
2188 | static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, | |
2189 | u64 target_ns, u64 rtt_est_ns) | |
2190 | { | |
2191 | /* convert byte-rate into time-per-byte | |
2192 | * so it will always unwedge in reasonable time. | |
2193 | */ | |
2194 | static const u64 MIN_RATE = 64; | |
2195 | u32 byte_target = mtu; | |
2196 | u64 byte_target_ns; | |
2197 | u8 rate_shft = 0; | |
2198 | u64 rate_ns = 0; | |
2199 | ||
2200 | b->flow_quantum = 1514; | |
2201 | if (rate) { | |
2202 | b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL); | |
2203 | rate_shft = 34; | |
2204 | rate_ns = ((u64)NSEC_PER_SEC) << rate_shft; | |
2205 | rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate)); | |
2206 | while (!!(rate_ns >> 34)) { | |
2207 | rate_ns >>= 1; | |
2208 | rate_shft--; | |
2209 | } | |
2210 | } /* else unlimited, ie. zero delay */ | |
2211 | ||
2212 | b->tin_rate_bps = rate; | |
2213 | b->tin_rate_ns = rate_ns; | |
2214 | b->tin_rate_shft = rate_shft; | |
2215 | ||
2216 | byte_target_ns = (byte_target * rate_ns) >> rate_shft; | |
2217 | ||
2218 | b->cparams.target = max((byte_target_ns * 3) / 2, target_ns); | |
2219 | b->cparams.interval = max(rtt_est_ns + | |
2220 | b->cparams.target - target_ns, | |
2221 | b->cparams.target * 2); | |
2222 | b->cparams.mtu_time = byte_target_ns; | |
2223 | b->cparams.p_inc = 1 << 24; /* 1/256 */ | |
2224 | b->cparams.p_dec = 1 << 20; /* 1/4096 */ | |
2225 | } | |
2226 | ||
83f8fd69 | 2227 | static int cake_config_besteffort(struct Qdisc *sch) |
046f6fd5 THJ |
2228 | { |
2229 | struct cake_sched_data *q = qdisc_priv(sch); | |
2230 | struct cake_tin_data *b = &q->tins[0]; | |
83f8fd69 THJ |
2231 | u32 mtu = psched_mtu(qdisc_dev(sch)); |
2232 | u64 rate = q->rate_bps; | |
046f6fd5 THJ |
2233 | |
2234 | q->tin_cnt = 1; | |
83f8fd69 THJ |
2235 | |
2236 | q->tin_index = besteffort; | |
2237 | q->tin_order = normal_order; | |
2238 | ||
2239 | cake_set_rate(b, rate, mtu, | |
046f6fd5 | 2240 | us_to_ns(q->target), us_to_ns(q->interval)); |
cbd22f17 | 2241 | b->tin_quantum = 65535; |
046f6fd5 | 2242 | |
83f8fd69 THJ |
2243 | return 0; |
2244 | } | |
2245 | ||
2246 | static int cake_config_precedence(struct Qdisc *sch) | |
2247 | { | |
2248 | /* convert high-level (user visible) parameters into internal format */ | |
2249 | struct cake_sched_data *q = qdisc_priv(sch); | |
2250 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2251 | u64 rate = q->rate_bps; | |
cbd22f17 | 2252 | u32 quantum = 256; |
83f8fd69 THJ |
2253 | u32 i; |
2254 | ||
2255 | q->tin_cnt = 8; | |
2256 | q->tin_index = precedence; | |
2257 | q->tin_order = normal_order; | |
2258 | ||
2259 | for (i = 0; i < q->tin_cnt; i++) { | |
2260 | struct cake_tin_data *b = &q->tins[i]; | |
2261 | ||
2262 | cake_set_rate(b, rate, mtu, us_to_ns(q->target), | |
2263 | us_to_ns(q->interval)); | |
2264 | ||
cbd22f17 | 2265 | b->tin_quantum = max_t(u16, 1U, quantum); |
83f8fd69 THJ |
2266 | |
2267 | /* calculate next class's parameters */ | |
2268 | rate *= 7; | |
2269 | rate >>= 3; | |
2270 | ||
cbd22f17 KDB |
2271 | quantum *= 7; |
2272 | quantum >>= 3; | |
83f8fd69 THJ |
2273 | } |
2274 | ||
2275 | return 0; | |
2276 | } | |
2277 | ||
2278 | /* List of known Diffserv codepoints: | |
2279 | * | |
2280 | * Least Effort (CS1) | |
2281 | * Best Effort (CS0) | |
2282 | * Max Reliability & LLT "Lo" (TOS1) | |
2283 | * Max Throughput (TOS2) | |
2284 | * Min Delay (TOS4) | |
2285 | * LLT "La" (TOS5) | |
2286 | * Assured Forwarding 1 (AF1x) - x3 | |
2287 | * Assured Forwarding 2 (AF2x) - x3 | |
2288 | * Assured Forwarding 3 (AF3x) - x3 | |
2289 | * Assured Forwarding 4 (AF4x) - x3 | |
2290 | * Precedence Class 2 (CS2) | |
2291 | * Precedence Class 3 (CS3) | |
2292 | * Precedence Class 4 (CS4) | |
2293 | * Precedence Class 5 (CS5) | |
2294 | * Precedence Class 6 (CS6) | |
2295 | * Precedence Class 7 (CS7) | |
2296 | * Voice Admit (VA) | |
2297 | * Expedited Forwarding (EF) | |
2298 | ||
2299 | * Total 25 codepoints. | |
2300 | */ | |
2301 | ||
2302 | /* List of traffic classes in RFC 4594: | |
2303 | * (roughly descending order of contended priority) | |
2304 | * (roughly ascending order of uncontended throughput) | |
2305 | * | |
2306 | * Network Control (CS6,CS7) - routing traffic | |
2307 | * Telephony (EF,VA) - aka. VoIP streams | |
2308 | * Signalling (CS5) - VoIP setup | |
2309 | * Multimedia Conferencing (AF4x) - aka. video calls | |
2310 | * Realtime Interactive (CS4) - eg. games | |
2311 | * Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch | |
2312 | * Broadcast Video (CS3) | |
2313 | * Low Latency Data (AF2x,TOS4) - eg. database | |
2314 | * Ops, Admin, Management (CS2,TOS1) - eg. ssh | |
2315 | * Standard Service (CS0 & unrecognised codepoints) | |
2316 | * High Throughput Data (AF1x,TOS2) - eg. web traffic | |
2317 | * Low Priority Data (CS1) - eg. BitTorrent | |
2318 | ||
2319 | * Total 12 traffic classes. | |
2320 | */ | |
2321 | ||
2322 | static int cake_config_diffserv8(struct Qdisc *sch) | |
2323 | { | |
2324 | /* Pruned list of traffic classes for typical applications: | |
2325 | * | |
2326 | * Network Control (CS6, CS7) | |
2327 | * Minimum Latency (EF, VA, CS5, CS4) | |
2328 | * Interactive Shell (CS2, TOS1) | |
2329 | * Low Latency Transactions (AF2x, TOS4) | |
2330 | * Video Streaming (AF4x, AF3x, CS3) | |
2331 | * Bog Standard (CS0 etc.) | |
2332 | * High Throughput (AF1x, TOS2) | |
2333 | * Background Traffic (CS1) | |
2334 | * | |
2335 | * Total 8 traffic classes. | |
2336 | */ | |
2337 | ||
2338 | struct cake_sched_data *q = qdisc_priv(sch); | |
2339 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2340 | u64 rate = q->rate_bps; | |
cbd22f17 | 2341 | u32 quantum = 256; |
83f8fd69 THJ |
2342 | u32 i; |
2343 | ||
2344 | q->tin_cnt = 8; | |
2345 | ||
2346 | /* codepoint to class mapping */ | |
2347 | q->tin_index = diffserv8; | |
2348 | q->tin_order = normal_order; | |
2349 | ||
2350 | /* class characteristics */ | |
2351 | for (i = 0; i < q->tin_cnt; i++) { | |
2352 | struct cake_tin_data *b = &q->tins[i]; | |
2353 | ||
2354 | cake_set_rate(b, rate, mtu, us_to_ns(q->target), | |
2355 | us_to_ns(q->interval)); | |
2356 | ||
cbd22f17 | 2357 | b->tin_quantum = max_t(u16, 1U, quantum); |
83f8fd69 THJ |
2358 | |
2359 | /* calculate next class's parameters */ | |
2360 | rate *= 7; | |
2361 | rate >>= 3; | |
2362 | ||
cbd22f17 KDB |
2363 | quantum *= 7; |
2364 | quantum >>= 3; | |
83f8fd69 THJ |
2365 | } |
2366 | ||
2367 | return 0; | |
2368 | } | |
2369 | ||
2370 | static int cake_config_diffserv4(struct Qdisc *sch) | |
2371 | { | |
2372 | /* Further pruned list of traffic classes for four-class system: | |
2373 | * | |
2374 | * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) | |
2375 | * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) | |
2376 | * Best Effort (CS0, AF1x, TOS2, and those not specified) | |
2377 | * Background Traffic (CS1) | |
2378 | * | |
2379 | * Total 4 traffic classes. | |
2380 | */ | |
2381 | ||
2382 | struct cake_sched_data *q = qdisc_priv(sch); | |
2383 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2384 | u64 rate = q->rate_bps; | |
2385 | u32 quantum = 1024; | |
2386 | ||
2387 | q->tin_cnt = 4; | |
2388 | ||
2389 | /* codepoint to class mapping */ | |
2390 | q->tin_index = diffserv4; | |
2391 | q->tin_order = bulk_order; | |
2392 | ||
2393 | /* class characteristics */ | |
2394 | cake_set_rate(&q->tins[0], rate, mtu, | |
2395 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2396 | cake_set_rate(&q->tins[1], rate >> 4, mtu, | |
2397 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2398 | cake_set_rate(&q->tins[2], rate >> 1, mtu, | |
2399 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2400 | cake_set_rate(&q->tins[3], rate >> 2, mtu, | |
2401 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2402 | ||
83f8fd69 | 2403 | /* bandwidth-sharing weights */ |
cbd22f17 KDB |
2404 | q->tins[0].tin_quantum = quantum; |
2405 | q->tins[1].tin_quantum = quantum >> 4; | |
2406 | q->tins[2].tin_quantum = quantum >> 1; | |
2407 | q->tins[3].tin_quantum = quantum >> 2; | |
83f8fd69 THJ |
2408 | |
2409 | return 0; | |
2410 | } | |
2411 | ||
2412 | static int cake_config_diffserv3(struct Qdisc *sch) | |
2413 | { | |
2414 | /* Simplified Diffserv structure with 3 tins. | |
2415 | * Low Priority (CS1) | |
2416 | * Best Effort | |
2417 | * Latency Sensitive (TOS4, VA, EF, CS6, CS7) | |
2418 | */ | |
2419 | struct cake_sched_data *q = qdisc_priv(sch); | |
2420 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2421 | u64 rate = q->rate_bps; | |
2422 | u32 quantum = 1024; | |
2423 | ||
2424 | q->tin_cnt = 3; | |
2425 | ||
2426 | /* codepoint to class mapping */ | |
2427 | q->tin_index = diffserv3; | |
2428 | q->tin_order = bulk_order; | |
2429 | ||
2430 | /* class characteristics */ | |
2431 | cake_set_rate(&q->tins[0], rate, mtu, | |
2432 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2433 | cake_set_rate(&q->tins[1], rate >> 4, mtu, | |
2434 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2435 | cake_set_rate(&q->tins[2], rate >> 2, mtu, | |
2436 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2437 | ||
83f8fd69 | 2438 | /* bandwidth-sharing weights */ |
cbd22f17 KDB |
2439 | q->tins[0].tin_quantum = quantum; |
2440 | q->tins[1].tin_quantum = quantum >> 4; | |
2441 | q->tins[2].tin_quantum = quantum >> 2; | |
83f8fd69 THJ |
2442 | |
2443 | return 0; | |
2444 | } | |
2445 | ||
2446 | static void cake_reconfigure(struct Qdisc *sch) | |
2447 | { | |
2448 | struct cake_sched_data *q = qdisc_priv(sch); | |
2449 | int c, ft; | |
2450 | ||
2451 | switch (q->tin_mode) { | |
2452 | case CAKE_DIFFSERV_BESTEFFORT: | |
2453 | ft = cake_config_besteffort(sch); | |
2454 | break; | |
2455 | ||
2456 | case CAKE_DIFFSERV_PRECEDENCE: | |
2457 | ft = cake_config_precedence(sch); | |
2458 | break; | |
2459 | ||
2460 | case CAKE_DIFFSERV_DIFFSERV8: | |
2461 | ft = cake_config_diffserv8(sch); | |
2462 | break; | |
2463 | ||
2464 | case CAKE_DIFFSERV_DIFFSERV4: | |
2465 | ft = cake_config_diffserv4(sch); | |
2466 | break; | |
2467 | ||
2468 | case CAKE_DIFFSERV_DIFFSERV3: | |
2469 | default: | |
2470 | ft = cake_config_diffserv3(sch); | |
2471 | break; | |
2472 | } | |
2473 | ||
046f6fd5 THJ |
2474 | for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) { |
2475 | cake_clear_tin(sch, c); | |
2476 | q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time; | |
2477 | } | |
2478 | ||
2479 | q->rate_ns = q->tins[ft].tin_rate_ns; | |
2480 | q->rate_shft = q->tins[ft].tin_rate_shft; | |
2481 | ||
2482 | if (q->buffer_config_limit) { | |
2483 | q->buffer_limit = q->buffer_config_limit; | |
2484 | } else if (q->rate_bps) { | |
2485 | u64 t = q->rate_bps * q->interval; | |
2486 | ||
2487 | do_div(t, USEC_PER_SEC / 4); | |
2488 | q->buffer_limit = max_t(u32, t, 4U << 20); | |
2489 | } else { | |
2490 | q->buffer_limit = ~0; | |
2491 | } | |
2492 | ||
2493 | sch->flags &= ~TCQ_F_CAN_BYPASS; | |
2494 | ||
2495 | q->buffer_limit = min(q->buffer_limit, | |
2496 | max(sch->limit * psched_mtu(qdisc_dev(sch)), | |
2497 | q->buffer_config_limit)); | |
2498 | } | |
2499 | ||
2500 | static int cake_change(struct Qdisc *sch, struct nlattr *opt, | |
2501 | struct netlink_ext_ack *extack) | |
2502 | { | |
2503 | struct cake_sched_data *q = qdisc_priv(sch); | |
2504 | struct nlattr *tb[TCA_CAKE_MAX + 1]; | |
2505 | int err; | |
2506 | ||
2507 | if (!opt) | |
2508 | return -EINVAL; | |
2509 | ||
8cb08174 JB |
2510 | err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, |
2511 | extack); | |
046f6fd5 THJ |
2512 | if (err < 0) |
2513 | return err; | |
2514 | ||
ea825115 THJ |
2515 | if (tb[TCA_CAKE_NAT]) { |
2516 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
2517 | q->flow_mode &= ~CAKE_FLOW_NAT_FLAG; | |
2518 | q->flow_mode |= CAKE_FLOW_NAT_FLAG * | |
2519 | !!nla_get_u32(tb[TCA_CAKE_NAT]); | |
2520 | #else | |
2521 | NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT], | |
2522 | "No conntrack support in kernel"); | |
2523 | return -EOPNOTSUPP; | |
2524 | #endif | |
2525 | } | |
2526 | ||
046f6fd5 THJ |
2527 | if (tb[TCA_CAKE_BASE_RATE64]) |
2528 | q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]); | |
2529 | ||
83f8fd69 THJ |
2530 | if (tb[TCA_CAKE_DIFFSERV_MODE]) |
2531 | q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]); | |
2532 | ||
2533 | if (tb[TCA_CAKE_WASH]) { | |
2534 | if (!!nla_get_u32(tb[TCA_CAKE_WASH])) | |
2535 | q->rate_flags |= CAKE_FLAG_WASH; | |
2536 | else | |
2537 | q->rate_flags &= ~CAKE_FLAG_WASH; | |
2538 | } | |
2539 | ||
046f6fd5 | 2540 | if (tb[TCA_CAKE_FLOW_MODE]) |
ea825115 THJ |
2541 | q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) | |
2542 | (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) & | |
2543 | CAKE_FLOW_MASK)); | |
046f6fd5 | 2544 | |
a729b7f0 THJ |
2545 | if (tb[TCA_CAKE_ATM]) |
2546 | q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]); | |
2547 | ||
2548 | if (tb[TCA_CAKE_OVERHEAD]) { | |
2549 | q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]); | |
2550 | q->rate_flags |= CAKE_FLAG_OVERHEAD; | |
2551 | ||
2552 | q->max_netlen = 0; | |
2553 | q->max_adjlen = 0; | |
2554 | q->min_netlen = ~0; | |
2555 | q->min_adjlen = ~0; | |
2556 | } | |
2557 | ||
2558 | if (tb[TCA_CAKE_RAW]) { | |
2559 | q->rate_flags &= ~CAKE_FLAG_OVERHEAD; | |
2560 | ||
2561 | q->max_netlen = 0; | |
2562 | q->max_adjlen = 0; | |
2563 | q->min_netlen = ~0; | |
2564 | q->min_adjlen = ~0; | |
2565 | } | |
2566 | ||
2567 | if (tb[TCA_CAKE_MPU]) | |
2568 | q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]); | |
2569 | ||
046f6fd5 THJ |
2570 | if (tb[TCA_CAKE_RTT]) { |
2571 | q->interval = nla_get_u32(tb[TCA_CAKE_RTT]); | |
2572 | ||
2573 | if (!q->interval) | |
2574 | q->interval = 1; | |
2575 | } | |
2576 | ||
2577 | if (tb[TCA_CAKE_TARGET]) { | |
2578 | q->target = nla_get_u32(tb[TCA_CAKE_TARGET]); | |
2579 | ||
2580 | if (!q->target) | |
2581 | q->target = 1; | |
2582 | } | |
2583 | ||
7298de9c THJ |
2584 | if (tb[TCA_CAKE_AUTORATE]) { |
2585 | if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE])) | |
2586 | q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS; | |
2587 | else | |
2588 | q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS; | |
2589 | } | |
2590 | ||
2591 | if (tb[TCA_CAKE_INGRESS]) { | |
2592 | if (!!nla_get_u32(tb[TCA_CAKE_INGRESS])) | |
2593 | q->rate_flags |= CAKE_FLAG_INGRESS; | |
2594 | else | |
2595 | q->rate_flags &= ~CAKE_FLAG_INGRESS; | |
2596 | } | |
2597 | ||
8b713881 THJ |
2598 | if (tb[TCA_CAKE_ACK_FILTER]) |
2599 | q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]); | |
2600 | ||
046f6fd5 THJ |
2601 | if (tb[TCA_CAKE_MEMORY]) |
2602 | q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]); | |
2603 | ||
2db6dc26 DT |
2604 | if (tb[TCA_CAKE_SPLIT_GSO]) { |
2605 | if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO])) | |
2606 | q->rate_flags |= CAKE_FLAG_SPLIT_GSO; | |
2607 | else | |
2608 | q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; | |
2609 | } | |
0c850344 | 2610 | |
0b5c7efd | 2611 | if (tb[TCA_CAKE_FWMARK]) { |
eab2fc82 THJ |
2612 | q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); |
2613 | q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; | |
0b5c7efd KDB |
2614 | } |
2615 | ||
046f6fd5 THJ |
2616 | if (q->tins) { |
2617 | sch_tree_lock(sch); | |
2618 | cake_reconfigure(sch); | |
2619 | sch_tree_unlock(sch); | |
2620 | } | |
2621 | ||
2622 | return 0; | |
2623 | } | |
2624 | ||
2625 | static void cake_destroy(struct Qdisc *sch) | |
2626 | { | |
2627 | struct cake_sched_data *q = qdisc_priv(sch); | |
2628 | ||
2629 | qdisc_watchdog_cancel(&q->watchdog); | |
2630 | tcf_block_put(q->block); | |
2631 | kvfree(q->tins); | |
2632 | } | |
2633 | ||
2634 | static int cake_init(struct Qdisc *sch, struct nlattr *opt, | |
2635 | struct netlink_ext_ack *extack) | |
2636 | { | |
2637 | struct cake_sched_data *q = qdisc_priv(sch); | |
2638 | int i, j, err; | |
2639 | ||
2640 | sch->limit = 10240; | |
83f8fd69 | 2641 | q->tin_mode = CAKE_DIFFSERV_DIFFSERV3; |
046f6fd5 THJ |
2642 | q->flow_mode = CAKE_FLOW_TRIPLE; |
2643 | ||
2644 | q->rate_bps = 0; /* unlimited by default */ | |
2645 | ||
2646 | q->interval = 100000; /* 100ms default */ | |
2647 | q->target = 5000; /* 5ms: codel RFC argues | |
2648 | * for 5 to 10% of interval | |
2649 | */ | |
2db6dc26 | 2650 | q->rate_flags |= CAKE_FLAG_SPLIT_GSO; |
046f6fd5 THJ |
2651 | q->cur_tin = 0; |
2652 | q->cur_flow = 0; | |
2653 | ||
2654 | qdisc_watchdog_init(&q->watchdog, sch); | |
2655 | ||
2656 | if (opt) { | |
2657 | int err = cake_change(sch, opt, extack); | |
2658 | ||
2659 | if (err) | |
2660 | return err; | |
2661 | } | |
2662 | ||
2663 | err = tcf_block_get(&q->block, &q->filter_list, sch, extack); | |
2664 | if (err) | |
2665 | return err; | |
2666 | ||
2667 | quantum_div[0] = ~0; | |
2668 | for (i = 1; i <= CAKE_QUEUES; i++) | |
2669 | quantum_div[i] = 65535 / i; | |
2670 | ||
329e0989 | 2671 | q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), |
046f6fd5 THJ |
2672 | GFP_KERNEL); |
2673 | if (!q->tins) | |
2674 | goto nomem; | |
2675 | ||
2676 | for (i = 0; i < CAKE_MAX_TINS; i++) { | |
2677 | struct cake_tin_data *b = q->tins + i; | |
2678 | ||
2679 | INIT_LIST_HEAD(&b->new_flows); | |
2680 | INIT_LIST_HEAD(&b->old_flows); | |
2681 | INIT_LIST_HEAD(&b->decaying_flows); | |
2682 | b->sparse_flow_count = 0; | |
2683 | b->bulk_flow_count = 0; | |
2684 | b->decaying_flow_count = 0; | |
2685 | ||
2686 | for (j = 0; j < CAKE_QUEUES; j++) { | |
2687 | struct cake_flow *flow = b->flows + j; | |
2688 | u32 k = j * CAKE_MAX_TINS + i; | |
2689 | ||
2690 | INIT_LIST_HEAD(&flow->flowchain); | |
2691 | cobalt_vars_init(&flow->cvars); | |
2692 | ||
2693 | q->overflow_heap[k].t = i; | |
2694 | q->overflow_heap[k].b = j; | |
2695 | b->overflow_idx[j] = k; | |
2696 | } | |
2697 | } | |
2698 | ||
2699 | cake_reconfigure(sch); | |
2700 | q->avg_peak_bandwidth = q->rate_bps; | |
2701 | q->min_netlen = ~0; | |
2702 | q->min_adjlen = ~0; | |
2703 | return 0; | |
2704 | ||
2705 | nomem: | |
2706 | cake_destroy(sch); | |
2707 | return -ENOMEM; | |
2708 | } | |
2709 | ||
2710 | static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) | |
2711 | { | |
2712 | struct cake_sched_data *q = qdisc_priv(sch); | |
2713 | struct nlattr *opts; | |
2714 | ||
ae0be8de | 2715 | opts = nla_nest_start_noflag(skb, TCA_OPTIONS); |
046f6fd5 THJ |
2716 | if (!opts) |
2717 | goto nla_put_failure; | |
2718 | ||
2719 | if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps, | |
2720 | TCA_CAKE_PAD)) | |
2721 | goto nla_put_failure; | |
2722 | ||
2723 | if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, | |
2724 | q->flow_mode & CAKE_FLOW_MASK)) | |
2725 | goto nla_put_failure; | |
2726 | ||
2727 | if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval)) | |
2728 | goto nla_put_failure; | |
2729 | ||
2730 | if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target)) | |
2731 | goto nla_put_failure; | |
2732 | ||
2733 | if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit)) | |
2734 | goto nla_put_failure; | |
2735 | ||
7298de9c THJ |
2736 | if (nla_put_u32(skb, TCA_CAKE_AUTORATE, |
2737 | !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS))) | |
2738 | goto nla_put_failure; | |
2739 | ||
2740 | if (nla_put_u32(skb, TCA_CAKE_INGRESS, | |
2741 | !!(q->rate_flags & CAKE_FLAG_INGRESS))) | |
2742 | goto nla_put_failure; | |
2743 | ||
8b713881 THJ |
2744 | if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter)) |
2745 | goto nla_put_failure; | |
2746 | ||
ea825115 THJ |
2747 | if (nla_put_u32(skb, TCA_CAKE_NAT, |
2748 | !!(q->flow_mode & CAKE_FLOW_NAT_FLAG))) | |
2749 | goto nla_put_failure; | |
2750 | ||
83f8fd69 THJ |
2751 | if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode)) |
2752 | goto nla_put_failure; | |
2753 | ||
2754 | if (nla_put_u32(skb, TCA_CAKE_WASH, | |
2755 | !!(q->rate_flags & CAKE_FLAG_WASH))) | |
2756 | goto nla_put_failure; | |
a729b7f0 THJ |
2757 | |
2758 | if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead)) | |
2759 | goto nla_put_failure; | |
2760 | ||
2761 | if (!(q->rate_flags & CAKE_FLAG_OVERHEAD)) | |
2762 | if (nla_put_u32(skb, TCA_CAKE_RAW, 0)) | |
2763 | goto nla_put_failure; | |
2764 | ||
2765 | if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode)) | |
2766 | goto nla_put_failure; | |
2767 | ||
2768 | if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu)) | |
2769 | goto nla_put_failure; | |
0c850344 THJ |
2770 | |
2771 | if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO, | |
2772 | !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) | |
2773 | goto nla_put_failure; | |
0b5c7efd | 2774 | |
eab2fc82 | 2775 | if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) |
0b5c7efd | 2776 | goto nla_put_failure; |
83f8fd69 | 2777 | |
046f6fd5 THJ |
2778 | return nla_nest_end(skb, opts); |
2779 | ||
2780 | nla_put_failure: | |
2781 | return -1; | |
2782 | } | |
2783 | ||
2784 | static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | |
2785 | { | |
ae0be8de | 2786 | struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); |
046f6fd5 THJ |
2787 | struct cake_sched_data *q = qdisc_priv(sch); |
2788 | struct nlattr *tstats, *ts; | |
2789 | int i; | |
2790 | ||
2791 | if (!stats) | |
2792 | return -1; | |
2793 | ||
2794 | #define PUT_STAT_U32(attr, data) do { \ | |
2795 | if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ | |
2796 | goto nla_put_failure; \ | |
2797 | } while (0) | |
2798 | #define PUT_STAT_U64(attr, data) do { \ | |
2799 | if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \ | |
2800 | data, TCA_CAKE_STATS_PAD)) \ | |
2801 | goto nla_put_failure; \ | |
2802 | } while (0) | |
2803 | ||
2804 | PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth); | |
2805 | PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit); | |
2806 | PUT_STAT_U32(MEMORY_USED, q->buffer_max_used); | |
2807 | PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16)); | |
2808 | PUT_STAT_U32(MAX_NETLEN, q->max_netlen); | |
2809 | PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); | |
2810 | PUT_STAT_U32(MIN_NETLEN, q->min_netlen); | |
2811 | PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); | |
2812 | ||
2813 | #undef PUT_STAT_U32 | |
2814 | #undef PUT_STAT_U64 | |
2815 | ||
ae0be8de | 2816 | tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS); |
046f6fd5 THJ |
2817 | if (!tstats) |
2818 | goto nla_put_failure; | |
2819 | ||
2820 | #define PUT_TSTAT_U32(attr, data) do { \ | |
2821 | if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \ | |
2822 | goto nla_put_failure; \ | |
2823 | } while (0) | |
2824 | #define PUT_TSTAT_U64(attr, data) do { \ | |
2825 | if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \ | |
2826 | data, TCA_CAKE_TIN_STATS_PAD)) \ | |
2827 | goto nla_put_failure; \ | |
2828 | } while (0) | |
2829 | ||
2830 | for (i = 0; i < q->tin_cnt; i++) { | |
83f8fd69 | 2831 | struct cake_tin_data *b = &q->tins[q->tin_order[i]]; |
046f6fd5 | 2832 | |
ae0be8de | 2833 | ts = nla_nest_start_noflag(d->skb, i + 1); |
046f6fd5 THJ |
2834 | if (!ts) |
2835 | goto nla_put_failure; | |
2836 | ||
2837 | PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps); | |
2838 | PUT_TSTAT_U64(SENT_BYTES64, b->bytes); | |
2839 | PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog); | |
2840 | ||
2841 | PUT_TSTAT_U32(TARGET_US, | |
2842 | ktime_to_us(ns_to_ktime(b->cparams.target))); | |
2843 | PUT_TSTAT_U32(INTERVAL_US, | |
2844 | ktime_to_us(ns_to_ktime(b->cparams.interval))); | |
2845 | ||
2846 | PUT_TSTAT_U32(SENT_PACKETS, b->packets); | |
2847 | PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped); | |
2848 | PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark); | |
2849 | PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops); | |
2850 | ||
2851 | PUT_TSTAT_U32(PEAK_DELAY_US, | |
2852 | ktime_to_us(ns_to_ktime(b->peak_delay))); | |
2853 | PUT_TSTAT_U32(AVG_DELAY_US, | |
2854 | ktime_to_us(ns_to_ktime(b->avge_delay))); | |
2855 | PUT_TSTAT_U32(BASE_DELAY_US, | |
2856 | ktime_to_us(ns_to_ktime(b->base_delay))); | |
2857 | ||
2858 | PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits); | |
2859 | PUT_TSTAT_U32(WAY_MISSES, b->way_misses); | |
2860 | PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions); | |
2861 | ||
2862 | PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count + | |
2863 | b->decaying_flow_count); | |
2864 | PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); | |
2865 | PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); | |
2866 | PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); | |
2867 | ||
2868 | PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum); | |
2869 | nla_nest_end(d->skb, ts); | |
2870 | } | |
2871 | ||
2872 | #undef PUT_TSTAT_U32 | |
2873 | #undef PUT_TSTAT_U64 | |
2874 | ||
2875 | nla_nest_end(d->skb, tstats); | |
2876 | return nla_nest_end(d->skb, stats); | |
2877 | ||
2878 | nla_put_failure: | |
2879 | nla_nest_cancel(d->skb, stats); | |
2880 | return -1; | |
2881 | } | |
2882 | ||
2883 | static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg) | |
2884 | { | |
2885 | return NULL; | |
2886 | } | |
2887 | ||
2888 | static unsigned long cake_find(struct Qdisc *sch, u32 classid) | |
2889 | { | |
2890 | return 0; | |
2891 | } | |
2892 | ||
2893 | static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent, | |
2894 | u32 classid) | |
2895 | { | |
2896 | return 0; | |
2897 | } | |
2898 | ||
2899 | static void cake_unbind(struct Qdisc *q, unsigned long cl) | |
2900 | { | |
2901 | } | |
2902 | ||
2903 | static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl, | |
2904 | struct netlink_ext_ack *extack) | |
2905 | { | |
2906 | struct cake_sched_data *q = qdisc_priv(sch); | |
2907 | ||
2908 | if (cl) | |
2909 | return NULL; | |
2910 | return q->block; | |
2911 | } | |
2912 | ||
2913 | static int cake_dump_class(struct Qdisc *sch, unsigned long cl, | |
2914 | struct sk_buff *skb, struct tcmsg *tcm) | |
2915 | { | |
2916 | tcm->tcm_handle |= TC_H_MIN(cl); | |
2917 | return 0; | |
2918 | } | |
2919 | ||
2920 | static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, | |
2921 | struct gnet_dump *d) | |
2922 | { | |
2923 | struct cake_sched_data *q = qdisc_priv(sch); | |
2924 | const struct cake_flow *flow = NULL; | |
2925 | struct gnet_stats_queue qs = { 0 }; | |
2926 | struct nlattr *stats; | |
2927 | u32 idx = cl - 1; | |
2928 | ||
2929 | if (idx < CAKE_QUEUES * q->tin_cnt) { | |
83f8fd69 THJ |
2930 | const struct cake_tin_data *b = \ |
2931 | &q->tins[q->tin_order[idx / CAKE_QUEUES]]; | |
046f6fd5 THJ |
2932 | const struct sk_buff *skb; |
2933 | ||
2934 | flow = &b->flows[idx % CAKE_QUEUES]; | |
2935 | ||
2936 | if (flow->head) { | |
2937 | sch_tree_lock(sch); | |
2938 | skb = flow->head; | |
2939 | while (skb) { | |
2940 | qs.qlen++; | |
2941 | skb = skb->next; | |
2942 | } | |
2943 | sch_tree_unlock(sch); | |
2944 | } | |
2945 | qs.backlog = b->backlogs[idx % CAKE_QUEUES]; | |
2946 | qs.drops = flow->dropped; | |
2947 | } | |
2948 | if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) | |
2949 | return -1; | |
2950 | if (flow) { | |
2951 | ktime_t now = ktime_get(); | |
2952 | ||
ae0be8de | 2953 | stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); |
046f6fd5 THJ |
2954 | if (!stats) |
2955 | return -1; | |
2956 | ||
2957 | #define PUT_STAT_U32(attr, data) do { \ | |
2958 | if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ | |
2959 | goto nla_put_failure; \ | |
2960 | } while (0) | |
2961 | #define PUT_STAT_S32(attr, data) do { \ | |
2962 | if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ | |
2963 | goto nla_put_failure; \ | |
2964 | } while (0) | |
2965 | ||
2966 | PUT_STAT_S32(DEFICIT, flow->deficit); | |
2967 | PUT_STAT_U32(DROPPING, flow->cvars.dropping); | |
2968 | PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); | |
2969 | PUT_STAT_U32(P_DROP, flow->cvars.p_drop); | |
2970 | if (flow->cvars.p_drop) { | |
2971 | PUT_STAT_S32(BLUE_TIMER_US, | |
2972 | ktime_to_us( | |
2973 | ktime_sub(now, | |
2974 | flow->cvars.blue_timer))); | |
2975 | } | |
2976 | if (flow->cvars.dropping) { | |
2977 | PUT_STAT_S32(DROP_NEXT_US, | |
2978 | ktime_to_us( | |
2979 | ktime_sub(now, | |
2980 | flow->cvars.drop_next))); | |
2981 | } | |
2982 | ||
2983 | if (nla_nest_end(d->skb, stats) < 0) | |
2984 | return -1; | |
2985 | } | |
2986 | ||
2987 | return 0; | |
2988 | ||
2989 | nla_put_failure: | |
2990 | nla_nest_cancel(d->skb, stats); | |
2991 | return -1; | |
2992 | } | |
2993 | ||
2994 | static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |
2995 | { | |
2996 | struct cake_sched_data *q = qdisc_priv(sch); | |
2997 | unsigned int i, j; | |
2998 | ||
2999 | if (arg->stop) | |
3000 | return; | |
3001 | ||
3002 | for (i = 0; i < q->tin_cnt; i++) { | |
83f8fd69 | 3003 | struct cake_tin_data *b = &q->tins[q->tin_order[i]]; |
046f6fd5 THJ |
3004 | |
3005 | for (j = 0; j < CAKE_QUEUES; j++) { | |
3006 | if (list_empty(&b->flows[j].flowchain) || | |
3007 | arg->count < arg->skip) { | |
3008 | arg->count++; | |
3009 | continue; | |
3010 | } | |
3011 | if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) { | |
3012 | arg->stop = 1; | |
3013 | break; | |
3014 | } | |
3015 | arg->count++; | |
3016 | } | |
3017 | } | |
3018 | } | |
3019 | ||
3020 | static const struct Qdisc_class_ops cake_class_ops = { | |
3021 | .leaf = cake_leaf, | |
3022 | .find = cake_find, | |
3023 | .tcf_block = cake_tcf_block, | |
3024 | .bind_tcf = cake_bind, | |
3025 | .unbind_tcf = cake_unbind, | |
3026 | .dump = cake_dump_class, | |
3027 | .dump_stats = cake_dump_class_stats, | |
3028 | .walk = cake_walk, | |
3029 | }; | |
3030 | ||
3031 | static struct Qdisc_ops cake_qdisc_ops __read_mostly = { | |
3032 | .cl_ops = &cake_class_ops, | |
3033 | .id = "cake", | |
3034 | .priv_size = sizeof(struct cake_sched_data), | |
3035 | .enqueue = cake_enqueue, | |
3036 | .dequeue = cake_dequeue, | |
3037 | .peek = qdisc_peek_dequeued, | |
3038 | .init = cake_init, | |
3039 | .reset = cake_reset, | |
3040 | .destroy = cake_destroy, | |
3041 | .change = cake_change, | |
3042 | .dump = cake_dump, | |
3043 | .dump_stats = cake_dump_stats, | |
3044 | .owner = THIS_MODULE, | |
3045 | }; | |
3046 | ||
3047 | static int __init cake_module_init(void) | |
3048 | { | |
3049 | return register_qdisc(&cake_qdisc_ops); | |
3050 | } | |
3051 | ||
3052 | static void __exit cake_module_exit(void) | |
3053 | { | |
3054 | unregister_qdisc(&cake_qdisc_ops); | |
3055 | } | |
3056 | ||
3057 | module_init(cake_module_init) | |
3058 | module_exit(cake_module_exit) | |
3059 | MODULE_AUTHOR("Jonathan Morton"); | |
3060 | MODULE_LICENSE("Dual BSD/GPL"); | |
3061 | MODULE_DESCRIPTION("The CAKE shaper."); |