Commit | Line | Data |
---|---|---|
046f6fd5 THJ |
1 | // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause |
2 | ||
3 | /* COMMON Applications Kept Enhanced (CAKE) discipline | |
4 | * | |
5 | * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com> | |
6 | * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk> | |
7 | * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com> | |
8 | * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de> | |
9 | * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk> | |
10 | * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au> | |
11 | * | |
12 | * The CAKE Principles: | |
13 | * (or, how to have your cake and eat it too) | |
14 | * | |
15 | * This is a combination of several shaping, AQM and FQ techniques into one | |
16 | * easy-to-use package: | |
17 | * | |
18 | * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE | |
19 | * equipment and bloated MACs. This operates in deficit mode (as in sch_fq), | |
20 | * eliminating the need for any sort of burst parameter (eg. token bucket | |
21 | * depth). Burst support is limited to that necessary to overcome scheduling | |
22 | * latency. | |
23 | * | |
24 | * - A Diffserv-aware priority queue, giving more priority to certain classes, | |
25 | * up to a specified fraction of bandwidth. Above that bandwidth threshold, | |
26 | * the priority is reduced to avoid starving other tins. | |
27 | * | |
28 | * - Each priority tin has a separate Flow Queue system, to isolate traffic | |
29 | * flows from each other. This prevents a burst on one flow from increasing | |
30 | * the delay to another. Flows are distributed to queues using a | |
31 | * set-associative hash function. | |
32 | * | |
33 | * - Each queue is actively managed by Cobalt, which is a combination of the | |
34 | * Codel and Blue AQM algorithms. This serves flows fairly, and signals | |
35 | * congestion early via ECN (if available) and/or packet drops, to keep | |
36 | * latency low. The codel parameters are auto-tuned based on the bandwidth | |
37 | * setting, as is necessary at low bandwidths. | |
38 | * | |
39 | * The configuration parameters are kept deliberately simple for ease of use. | |
40 | * Everything has sane defaults. Complete generality of configuration is *not* | |
41 | * a goal. | |
42 | * | |
43 | * The priority queue operates according to a weighted DRR scheme, combined with | |
44 | * a bandwidth tracker which reuses the shaper logic to detect which side of the | |
45 | * bandwidth sharing threshold the tin is operating. This determines whether a | |
46 | * priority-based weight (high) or a bandwidth-based weight (low) is used for | |
47 | * that tin in the current pass. | |
48 | * | |
49 | * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly | |
50 | * granted us permission to leverage. | |
51 | */ | |
52 | ||
53 | #include <linux/module.h> | |
54 | #include <linux/types.h> | |
55 | #include <linux/kernel.h> | |
56 | #include <linux/jiffies.h> | |
57 | #include <linux/string.h> | |
58 | #include <linux/in.h> | |
59 | #include <linux/errno.h> | |
60 | #include <linux/init.h> | |
61 | #include <linux/skbuff.h> | |
62 | #include <linux/jhash.h> | |
63 | #include <linux/slab.h> | |
64 | #include <linux/vmalloc.h> | |
65 | #include <linux/reciprocal_div.h> | |
66 | #include <net/netlink.h> | |
046f6fd5 THJ |
67 | #include <linux/if_vlan.h> |
68 | #include <net/pkt_sched.h> | |
69 | #include <net/pkt_cls.h> | |
70 | #include <net/tcp.h> | |
71 | #include <net/flow_dissector.h> | |
72 | ||
ea825115 THJ |
73 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) |
74 | #include <net/netfilter/nf_conntrack_core.h> | |
75 | #endif | |
76 | ||
046f6fd5 THJ |
77 | #define CAKE_SET_WAYS (8) |
78 | #define CAKE_MAX_TINS (8) | |
79 | #define CAKE_QUEUES (1024) | |
80 | #define CAKE_FLOW_MASK 63 | |
81 | #define CAKE_FLOW_NAT_FLAG 64 | |
82 | ||
83 | /* struct cobalt_params - contains codel and blue parameters | |
84 | * @interval: codel initial drop rate | |
85 | * @target: maximum persistent sojourn time & blue update rate | |
86 | * @mtu_time: serialisation delay of maximum-size packet | |
87 | * @p_inc: increment of blue drop probability (0.32 fxp) | |
88 | * @p_dec: decrement of blue drop probability (0.32 fxp) | |
89 | */ | |
90 | struct cobalt_params { | |
91 | u64 interval; | |
92 | u64 target; | |
93 | u64 mtu_time; | |
94 | u32 p_inc; | |
95 | u32 p_dec; | |
96 | }; | |
97 | ||
98 | /* struct cobalt_vars - contains codel and blue variables | |
99 | * @count: codel dropping frequency | |
100 | * @rec_inv_sqrt: reciprocal value of sqrt(count) >> 1 | |
101 | * @drop_next: time to drop next packet, or when we dropped last | |
102 | * @blue_timer: Blue time to next drop | |
103 | * @p_drop: BLUE drop probability (0.32 fxp) | |
104 | * @dropping: set if in dropping state | |
105 | * @ecn_marked: set if marked | |
106 | */ | |
107 | struct cobalt_vars { | |
108 | u32 count; | |
109 | u32 rec_inv_sqrt; | |
110 | ktime_t drop_next; | |
111 | ktime_t blue_timer; | |
112 | u32 p_drop; | |
113 | bool dropping; | |
114 | bool ecn_marked; | |
115 | }; | |
116 | ||
117 | enum { | |
118 | CAKE_SET_NONE = 0, | |
119 | CAKE_SET_SPARSE, | |
120 | CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */ | |
121 | CAKE_SET_BULK, | |
122 | CAKE_SET_DECAYING | |
123 | }; | |
124 | ||
125 | struct cake_flow { | |
126 | /* this stuff is all needed per-flow at dequeue time */ | |
127 | struct sk_buff *head; | |
128 | struct sk_buff *tail; | |
129 | struct list_head flowchain; | |
130 | s32 deficit; | |
131 | u32 dropped; | |
132 | struct cobalt_vars cvars; | |
133 | u16 srchost; /* index into cake_host table */ | |
134 | u16 dsthost; | |
135 | u8 set; | |
136 | }; /* please try to keep this structure <= 64 bytes */ | |
137 | ||
138 | struct cake_host { | |
139 | u32 srchost_tag; | |
140 | u32 dsthost_tag; | |
71263992 GA |
141 | u16 srchost_bulk_flow_count; |
142 | u16 dsthost_bulk_flow_count; | |
046f6fd5 THJ |
143 | }; |
144 | ||
145 | struct cake_heap_entry { | |
146 | u16 t:3, b:10; | |
147 | }; | |
148 | ||
149 | struct cake_tin_data { | |
150 | struct cake_flow flows[CAKE_QUEUES]; | |
151 | u32 backlogs[CAKE_QUEUES]; | |
152 | u32 tags[CAKE_QUEUES]; /* for set association */ | |
153 | u16 overflow_idx[CAKE_QUEUES]; | |
154 | struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */ | |
155 | u16 flow_quantum; | |
156 | ||
157 | struct cobalt_params cparams; | |
158 | u32 drop_overlimit; | |
159 | u16 bulk_flow_count; | |
160 | u16 sparse_flow_count; | |
161 | u16 decaying_flow_count; | |
162 | u16 unresponsive_flow_count; | |
163 | ||
164 | u32 max_skblen; | |
165 | ||
166 | struct list_head new_flows; | |
167 | struct list_head old_flows; | |
168 | struct list_head decaying_flows; | |
169 | ||
170 | /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ | |
171 | ktime_t time_next_packet; | |
172 | u64 tin_rate_ns; | |
173 | u64 tin_rate_bps; | |
174 | u16 tin_rate_shft; | |
175 | ||
176 | u16 tin_quantum_prio; | |
177 | u16 tin_quantum_band; | |
178 | s32 tin_deficit; | |
179 | u32 tin_backlog; | |
180 | u32 tin_dropped; | |
181 | u32 tin_ecn_mark; | |
182 | ||
183 | u32 packets; | |
184 | u64 bytes; | |
185 | ||
186 | u32 ack_drops; | |
187 | ||
188 | /* moving averages */ | |
189 | u64 avge_delay; | |
190 | u64 peak_delay; | |
191 | u64 base_delay; | |
192 | ||
193 | /* hash function stats */ | |
194 | u32 way_directs; | |
195 | u32 way_hits; | |
196 | u32 way_misses; | |
197 | u32 way_collisions; | |
198 | }; /* number of tins is small, so size of this struct doesn't matter much */ | |
199 | ||
200 | struct cake_sched_data { | |
201 | struct tcf_proto __rcu *filter_list; /* optional external classifier */ | |
202 | struct tcf_block *block; | |
203 | struct cake_tin_data *tins; | |
204 | ||
205 | struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS]; | |
206 | u16 overflow_timeout; | |
207 | ||
208 | u16 tin_cnt; | |
209 | u8 tin_mode; | |
210 | u8 flow_mode; | |
211 | u8 ack_filter; | |
212 | u8 atm_mode; | |
213 | ||
eab2fc82 THJ |
214 | u32 fwmark_mask; |
215 | u16 fwmark_shft; | |
216 | ||
046f6fd5 THJ |
217 | /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ |
218 | u16 rate_shft; | |
219 | ktime_t time_next_packet; | |
220 | ktime_t failsafe_next_packet; | |
221 | u64 rate_ns; | |
222 | u64 rate_bps; | |
223 | u16 rate_flags; | |
224 | s16 rate_overhead; | |
225 | u16 rate_mpu; | |
226 | u64 interval; | |
227 | u64 target; | |
228 | ||
229 | /* resource tracking */ | |
230 | u32 buffer_used; | |
231 | u32 buffer_max_used; | |
232 | u32 buffer_limit; | |
233 | u32 buffer_config_limit; | |
234 | ||
235 | /* indices for dequeue */ | |
236 | u16 cur_tin; | |
237 | u16 cur_flow; | |
238 | ||
239 | struct qdisc_watchdog watchdog; | |
240 | const u8 *tin_index; | |
241 | const u8 *tin_order; | |
242 | ||
243 | /* bandwidth capacity estimate */ | |
244 | ktime_t last_packet_time; | |
245 | ktime_t avg_window_begin; | |
246 | u64 avg_packet_interval; | |
247 | u64 avg_window_bytes; | |
248 | u64 avg_peak_bandwidth; | |
249 | ktime_t last_reconfig_time; | |
250 | ||
251 | /* packet length stats */ | |
252 | u32 avg_netoff; | |
253 | u16 max_netlen; | |
254 | u16 max_adjlen; | |
255 | u16 min_netlen; | |
256 | u16 min_adjlen; | |
257 | }; | |
258 | ||
259 | enum { | |
260 | CAKE_FLAG_OVERHEAD = BIT(0), | |
261 | CAKE_FLAG_AUTORATE_INGRESS = BIT(1), | |
262 | CAKE_FLAG_INGRESS = BIT(2), | |
263 | CAKE_FLAG_WASH = BIT(3), | |
eab2fc82 | 264 | CAKE_FLAG_SPLIT_GSO = BIT(4) |
046f6fd5 THJ |
265 | }; |
266 | ||
267 | /* COBALT operates the Codel and BLUE algorithms in parallel, in order to | |
268 | * obtain the best features of each. Codel is excellent on flows which | |
269 | * respond to congestion signals in a TCP-like way. BLUE is more effective on | |
270 | * unresponsive flows. | |
271 | */ | |
272 | ||
273 | struct cobalt_skb_cb { | |
274 | ktime_t enqueue_time; | |
a729b7f0 | 275 | u32 adjusted_len; |
046f6fd5 THJ |
276 | }; |
277 | ||
278 | static u64 us_to_ns(u64 us) | |
279 | { | |
280 | return us * NSEC_PER_USEC; | |
281 | } | |
282 | ||
283 | static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb) | |
284 | { | |
285 | qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb)); | |
286 | return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data; | |
287 | } | |
288 | ||
289 | static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb) | |
290 | { | |
291 | return get_cobalt_cb(skb)->enqueue_time; | |
292 | } | |
293 | ||
294 | static void cobalt_set_enqueue_time(struct sk_buff *skb, | |
295 | ktime_t now) | |
296 | { | |
297 | get_cobalt_cb(skb)->enqueue_time = now; | |
298 | } | |
299 | ||
300 | static u16 quantum_div[CAKE_QUEUES + 1] = {0}; | |
301 | ||
83f8fd69 THJ |
302 | /* Diffserv lookup tables */ |
303 | ||
304 | static const u8 precedence[] = { | |
305 | 0, 0, 0, 0, 0, 0, 0, 0, | |
306 | 1, 1, 1, 1, 1, 1, 1, 1, | |
307 | 2, 2, 2, 2, 2, 2, 2, 2, | |
308 | 3, 3, 3, 3, 3, 3, 3, 3, | |
309 | 4, 4, 4, 4, 4, 4, 4, 4, | |
310 | 5, 5, 5, 5, 5, 5, 5, 5, | |
311 | 6, 6, 6, 6, 6, 6, 6, 6, | |
312 | 7, 7, 7, 7, 7, 7, 7, 7, | |
313 | }; | |
314 | ||
315 | static const u8 diffserv8[] = { | |
316 | 2, 5, 1, 2, 4, 2, 2, 2, | |
317 | 0, 2, 1, 2, 1, 2, 1, 2, | |
318 | 5, 2, 4, 2, 4, 2, 4, 2, | |
319 | 3, 2, 3, 2, 3, 2, 3, 2, | |
320 | 6, 2, 3, 2, 3, 2, 3, 2, | |
321 | 6, 2, 2, 2, 6, 2, 6, 2, | |
322 | 7, 2, 2, 2, 2, 2, 2, 2, | |
323 | 7, 2, 2, 2, 2, 2, 2, 2, | |
324 | }; | |
325 | ||
326 | static const u8 diffserv4[] = { | |
327 | 0, 2, 0, 0, 2, 0, 0, 0, | |
328 | 1, 0, 0, 0, 0, 0, 0, 0, | |
329 | 2, 0, 2, 0, 2, 0, 2, 0, | |
330 | 2, 0, 2, 0, 2, 0, 2, 0, | |
331 | 3, 0, 2, 0, 2, 0, 2, 0, | |
332 | 3, 0, 0, 0, 3, 0, 3, 0, | |
333 | 3, 0, 0, 0, 0, 0, 0, 0, | |
334 | 3, 0, 0, 0, 0, 0, 0, 0, | |
335 | }; | |
336 | ||
337 | static const u8 diffserv3[] = { | |
338 | 0, 0, 0, 0, 2, 0, 0, 0, | |
339 | 1, 0, 0, 0, 0, 0, 0, 0, | |
340 | 0, 0, 0, 0, 0, 0, 0, 0, | |
341 | 0, 0, 0, 0, 0, 0, 0, 0, | |
342 | 0, 0, 0, 0, 0, 0, 0, 0, | |
343 | 0, 0, 0, 0, 2, 0, 2, 0, | |
344 | 2, 0, 0, 0, 0, 0, 0, 0, | |
345 | 2, 0, 0, 0, 0, 0, 0, 0, | |
346 | }; | |
347 | ||
348 | static const u8 besteffort[] = { | |
349 | 0, 0, 0, 0, 0, 0, 0, 0, | |
350 | 0, 0, 0, 0, 0, 0, 0, 0, | |
351 | 0, 0, 0, 0, 0, 0, 0, 0, | |
352 | 0, 0, 0, 0, 0, 0, 0, 0, | |
353 | 0, 0, 0, 0, 0, 0, 0, 0, | |
354 | 0, 0, 0, 0, 0, 0, 0, 0, | |
355 | 0, 0, 0, 0, 0, 0, 0, 0, | |
356 | 0, 0, 0, 0, 0, 0, 0, 0, | |
357 | }; | |
358 | ||
359 | /* tin priority order for stats dumping */ | |
360 | ||
361 | static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7}; | |
362 | static const u8 bulk_order[] = {1, 0, 2, 3}; | |
363 | ||
046f6fd5 THJ |
364 | #define REC_INV_SQRT_CACHE (16) |
365 | static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0}; | |
366 | ||
367 | /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots | |
368 | * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) | |
369 | * | |
370 | * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 | |
371 | */ | |
372 | ||
373 | static void cobalt_newton_step(struct cobalt_vars *vars) | |
374 | { | |
375 | u32 invsqrt, invsqrt2; | |
376 | u64 val; | |
377 | ||
378 | invsqrt = vars->rec_inv_sqrt; | |
379 | invsqrt2 = ((u64)invsqrt * invsqrt) >> 32; | |
380 | val = (3LL << 32) - ((u64)vars->count * invsqrt2); | |
381 | ||
382 | val >>= 2; /* avoid overflow in following multiply */ | |
383 | val = (val * invsqrt) >> (32 - 2 + 1); | |
384 | ||
385 | vars->rec_inv_sqrt = val; | |
386 | } | |
387 | ||
388 | static void cobalt_invsqrt(struct cobalt_vars *vars) | |
389 | { | |
390 | if (vars->count < REC_INV_SQRT_CACHE) | |
391 | vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count]; | |
392 | else | |
393 | cobalt_newton_step(vars); | |
394 | } | |
395 | ||
396 | /* There is a big difference in timing between the accurate values placed in | |
397 | * the cache and the approximations given by a single Newton step for small | |
398 | * count values, particularly when stepping from count 1 to 2 or vice versa. | |
399 | * Above 16, a single Newton step gives sufficient accuracy in either | |
400 | * direction, given the precision stored. | |
401 | * | |
402 | * The magnitude of the error when stepping up to count 2 is such as to give | |
403 | * the value that *should* have been produced at count 4. | |
404 | */ | |
405 | ||
406 | static void cobalt_cache_init(void) | |
407 | { | |
408 | struct cobalt_vars v; | |
409 | ||
410 | memset(&v, 0, sizeof(v)); | |
411 | v.rec_inv_sqrt = ~0U; | |
412 | cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt; | |
413 | ||
414 | for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) { | |
415 | cobalt_newton_step(&v); | |
416 | cobalt_newton_step(&v); | |
417 | cobalt_newton_step(&v); | |
418 | cobalt_newton_step(&v); | |
419 | ||
420 | cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt; | |
421 | } | |
422 | } | |
423 | ||
424 | static void cobalt_vars_init(struct cobalt_vars *vars) | |
425 | { | |
426 | memset(vars, 0, sizeof(*vars)); | |
427 | ||
428 | if (!cobalt_rec_inv_sqrt_cache[0]) { | |
429 | cobalt_cache_init(); | |
430 | cobalt_rec_inv_sqrt_cache[0] = ~0; | |
431 | } | |
432 | } | |
433 | ||
434 | /* CoDel control_law is t + interval/sqrt(count) | |
435 | * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid | |
436 | * both sqrt() and divide operation. | |
437 | */ | |
438 | static ktime_t cobalt_control(ktime_t t, | |
439 | u64 interval, | |
440 | u32 rec_inv_sqrt) | |
441 | { | |
442 | return ktime_add_ns(t, reciprocal_scale(interval, | |
443 | rec_inv_sqrt)); | |
444 | } | |
445 | ||
446 | /* Call this when a packet had to be dropped due to queue overflow. Returns | |
447 | * true if the BLUE state was quiescent before but active after this call. | |
448 | */ | |
449 | static bool cobalt_queue_full(struct cobalt_vars *vars, | |
450 | struct cobalt_params *p, | |
451 | ktime_t now) | |
452 | { | |
453 | bool up = false; | |
454 | ||
455 | if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { | |
456 | up = !vars->p_drop; | |
457 | vars->p_drop += p->p_inc; | |
458 | if (vars->p_drop < p->p_inc) | |
459 | vars->p_drop = ~0; | |
460 | vars->blue_timer = now; | |
461 | } | |
462 | vars->dropping = true; | |
463 | vars->drop_next = now; | |
464 | if (!vars->count) | |
465 | vars->count = 1; | |
466 | ||
467 | return up; | |
468 | } | |
469 | ||
470 | /* Call this when the queue was serviced but turned out to be empty. Returns | |
471 | * true if the BLUE state was active before but quiescent after this call. | |
472 | */ | |
473 | static bool cobalt_queue_empty(struct cobalt_vars *vars, | |
474 | struct cobalt_params *p, | |
475 | ktime_t now) | |
476 | { | |
477 | bool down = false; | |
478 | ||
479 | if (vars->p_drop && | |
480 | ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) { | |
481 | if (vars->p_drop < p->p_dec) | |
482 | vars->p_drop = 0; | |
483 | else | |
484 | vars->p_drop -= p->p_dec; | |
485 | vars->blue_timer = now; | |
486 | down = !vars->p_drop; | |
487 | } | |
488 | vars->dropping = false; | |
489 | ||
490 | if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) { | |
491 | vars->count--; | |
492 | cobalt_invsqrt(vars); | |
493 | vars->drop_next = cobalt_control(vars->drop_next, | |
494 | p->interval, | |
495 | vars->rec_inv_sqrt); | |
496 | } | |
497 | ||
498 | return down; | |
499 | } | |
500 | ||
501 | /* Call this with a freshly dequeued packet for possible congestion marking. | |
502 | * Returns true as an instruction to drop the packet, false for delivery. | |
503 | */ | |
504 | static bool cobalt_should_drop(struct cobalt_vars *vars, | |
505 | struct cobalt_params *p, | |
506 | ktime_t now, | |
7298de9c THJ |
507 | struct sk_buff *skb, |
508 | u32 bulk_flows) | |
046f6fd5 THJ |
509 | { |
510 | bool next_due, over_target, drop = false; | |
511 | ktime_t schedule; | |
512 | u64 sojourn; | |
513 | ||
514 | /* The 'schedule' variable records, in its sign, whether 'now' is before or | |
515 | * after 'drop_next'. This allows 'drop_next' to be updated before the next | |
516 | * scheduling decision is actually branched, without destroying that | |
517 | * information. Similarly, the first 'schedule' value calculated is preserved | |
518 | * in the boolean 'next_due'. | |
519 | * | |
520 | * As for 'drop_next', we take advantage of the fact that 'interval' is both | |
521 | * the delay between first exceeding 'target' and the first signalling event, | |
522 | * *and* the scaling factor for the signalling frequency. It's therefore very | |
523 | * natural to use a single mechanism for both purposes, and eliminates a | |
524 | * significant amount of reference Codel's spaghetti code. To help with this, | |
525 | * both the '0' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close | |
526 | * as possible to 1.0 in fixed-point. | |
527 | */ | |
528 | ||
529 | sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); | |
530 | schedule = ktime_sub(now, vars->drop_next); | |
531 | over_target = sojourn > p->target && | |
7298de9c | 532 | sojourn > p->mtu_time * bulk_flows * 2 && |
046f6fd5 THJ |
533 | sojourn > p->mtu_time * 4; |
534 | next_due = vars->count && ktime_to_ns(schedule) >= 0; | |
535 | ||
536 | vars->ecn_marked = false; | |
537 | ||
538 | if (over_target) { | |
539 | if (!vars->dropping) { | |
540 | vars->dropping = true; | |
541 | vars->drop_next = cobalt_control(now, | |
542 | p->interval, | |
543 | vars->rec_inv_sqrt); | |
544 | } | |
545 | if (!vars->count) | |
546 | vars->count = 1; | |
547 | } else if (vars->dropping) { | |
548 | vars->dropping = false; | |
549 | } | |
550 | ||
551 | if (next_due && vars->dropping) { | |
552 | /* Use ECN mark if possible, otherwise drop */ | |
553 | drop = !(vars->ecn_marked = INET_ECN_set_ce(skb)); | |
554 | ||
555 | vars->count++; | |
556 | if (!vars->count) | |
557 | vars->count--; | |
558 | cobalt_invsqrt(vars); | |
559 | vars->drop_next = cobalt_control(vars->drop_next, | |
560 | p->interval, | |
561 | vars->rec_inv_sqrt); | |
562 | schedule = ktime_sub(now, vars->drop_next); | |
563 | } else { | |
564 | while (next_due) { | |
565 | vars->count--; | |
566 | cobalt_invsqrt(vars); | |
567 | vars->drop_next = cobalt_control(vars->drop_next, | |
568 | p->interval, | |
569 | vars->rec_inv_sqrt); | |
570 | schedule = ktime_sub(now, vars->drop_next); | |
571 | next_due = vars->count && ktime_to_ns(schedule) >= 0; | |
572 | } | |
573 | } | |
574 | ||
575 | /* Simple BLUE implementation. Lack of ECN is deliberate. */ | |
576 | if (vars->p_drop) | |
577 | drop |= (prandom_u32() < vars->p_drop); | |
578 | ||
579 | /* Overload the drop_next field as an activity timeout */ | |
580 | if (!vars->count) | |
581 | vars->drop_next = ktime_add_ns(now, p->interval); | |
582 | else if (ktime_to_ns(schedule) > 0 && !drop) | |
583 | vars->drop_next = now; | |
584 | ||
585 | return drop; | |
586 | } | |
587 | ||
ea825115 THJ |
588 | static void cake_update_flowkeys(struct flow_keys *keys, |
589 | const struct sk_buff *skb) | |
590 | { | |
591 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
592 | struct nf_conntrack_tuple tuple = {}; | |
593 | bool rev = !skb->_nfct; | |
594 | ||
595 | if (tc_skb_protocol(skb) != htons(ETH_P_IP)) | |
596 | return; | |
597 | ||
598 | if (!nf_ct_get_tuple_skb(&tuple, skb)) | |
599 | return; | |
600 | ||
601 | keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip; | |
602 | keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip; | |
603 | ||
604 | if (keys->ports.ports) { | |
605 | keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all; | |
606 | keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all; | |
607 | } | |
608 | #endif | |
609 | } | |
610 | ||
046f6fd5 THJ |
611 | /* Cake has several subtle multiple bit settings. In these cases you |
612 | * would be matching triple isolate mode as well. | |
613 | */ | |
614 | ||
615 | static bool cake_dsrc(int flow_mode) | |
616 | { | |
617 | return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC; | |
618 | } | |
619 | ||
620 | static bool cake_ddst(int flow_mode) | |
621 | { | |
622 | return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST; | |
623 | } | |
624 | ||
625 | static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb, | |
93cfb6c1 | 626 | int flow_mode, u16 flow_override, u16 host_override) |
046f6fd5 | 627 | { |
93cfb6c1 | 628 | u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0; |
046f6fd5 THJ |
629 | u16 reduced_hash, srchost_idx, dsthost_idx; |
630 | struct flow_keys keys, host_keys; | |
631 | ||
632 | if (unlikely(flow_mode == CAKE_FLOW_NONE)) | |
633 | return 0; | |
634 | ||
93cfb6c1 THJ |
635 | /* If both overrides are set we can skip packet dissection entirely */ |
636 | if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) && | |
637 | (host_override || !(flow_mode & CAKE_FLOW_HOSTS))) | |
638 | goto skip_hash; | |
639 | ||
046f6fd5 THJ |
640 | skb_flow_dissect_flow_keys(skb, &keys, |
641 | FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); | |
642 | ||
ea825115 THJ |
643 | if (flow_mode & CAKE_FLOW_NAT_FLAG) |
644 | cake_update_flowkeys(&keys, skb); | |
645 | ||
046f6fd5 THJ |
646 | /* flow_hash_from_keys() sorts the addresses by value, so we have |
647 | * to preserve their order in a separate data structure to treat | |
648 | * src and dst host addresses as independently selectable. | |
649 | */ | |
650 | host_keys = keys; | |
651 | host_keys.ports.ports = 0; | |
652 | host_keys.basic.ip_proto = 0; | |
653 | host_keys.keyid.keyid = 0; | |
654 | host_keys.tags.flow_label = 0; | |
655 | ||
656 | switch (host_keys.control.addr_type) { | |
657 | case FLOW_DISSECTOR_KEY_IPV4_ADDRS: | |
658 | host_keys.addrs.v4addrs.src = 0; | |
659 | dsthost_hash = flow_hash_from_keys(&host_keys); | |
660 | host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; | |
661 | host_keys.addrs.v4addrs.dst = 0; | |
662 | srchost_hash = flow_hash_from_keys(&host_keys); | |
663 | break; | |
664 | ||
665 | case FLOW_DISSECTOR_KEY_IPV6_ADDRS: | |
666 | memset(&host_keys.addrs.v6addrs.src, 0, | |
667 | sizeof(host_keys.addrs.v6addrs.src)); | |
668 | dsthost_hash = flow_hash_from_keys(&host_keys); | |
669 | host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; | |
670 | memset(&host_keys.addrs.v6addrs.dst, 0, | |
671 | sizeof(host_keys.addrs.v6addrs.dst)); | |
672 | srchost_hash = flow_hash_from_keys(&host_keys); | |
673 | break; | |
674 | ||
675 | default: | |
676 | dsthost_hash = 0; | |
677 | srchost_hash = 0; | |
678 | } | |
679 | ||
680 | /* This *must* be after the above switch, since as a | |
681 | * side-effect it sorts the src and dst addresses. | |
682 | */ | |
683 | if (flow_mode & CAKE_FLOW_FLOWS) | |
684 | flow_hash = flow_hash_from_keys(&keys); | |
685 | ||
93cfb6c1 THJ |
686 | skip_hash: |
687 | if (flow_override) | |
688 | flow_hash = flow_override - 1; | |
689 | if (host_override) { | |
690 | dsthost_hash = host_override - 1; | |
691 | srchost_hash = host_override - 1; | |
692 | } | |
693 | ||
046f6fd5 THJ |
694 | if (!(flow_mode & CAKE_FLOW_FLOWS)) { |
695 | if (flow_mode & CAKE_FLOW_SRC_IP) | |
696 | flow_hash ^= srchost_hash; | |
697 | ||
698 | if (flow_mode & CAKE_FLOW_DST_IP) | |
699 | flow_hash ^= dsthost_hash; | |
700 | } | |
701 | ||
702 | reduced_hash = flow_hash % CAKE_QUEUES; | |
703 | ||
704 | /* set-associative hashing */ | |
705 | /* fast path if no hash collision (direct lookup succeeds) */ | |
706 | if (likely(q->tags[reduced_hash] == flow_hash && | |
707 | q->flows[reduced_hash].set)) { | |
708 | q->way_directs++; | |
709 | } else { | |
710 | u32 inner_hash = reduced_hash % CAKE_SET_WAYS; | |
711 | u32 outer_hash = reduced_hash - inner_hash; | |
712 | bool allocate_src = false; | |
713 | bool allocate_dst = false; | |
714 | u32 i, k; | |
715 | ||
716 | /* check if any active queue in the set is reserved for | |
717 | * this flow. | |
718 | */ | |
719 | for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; | |
720 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
721 | if (q->tags[outer_hash + k] == flow_hash) { | |
722 | if (i) | |
723 | q->way_hits++; | |
724 | ||
725 | if (!q->flows[outer_hash + k].set) { | |
726 | /* need to increment host refcnts */ | |
727 | allocate_src = cake_dsrc(flow_mode); | |
728 | allocate_dst = cake_ddst(flow_mode); | |
729 | } | |
730 | ||
731 | goto found; | |
732 | } | |
733 | } | |
734 | ||
735 | /* no queue is reserved for this flow, look for an | |
736 | * empty one. | |
737 | */ | |
738 | for (i = 0; i < CAKE_SET_WAYS; | |
739 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
740 | if (!q->flows[outer_hash + k].set) { | |
741 | q->way_misses++; | |
742 | allocate_src = cake_dsrc(flow_mode); | |
743 | allocate_dst = cake_ddst(flow_mode); | |
744 | goto found; | |
745 | } | |
746 | } | |
747 | ||
748 | /* With no empty queues, default to the original | |
749 | * queue, accept the collision, update the host tags. | |
750 | */ | |
751 | q->way_collisions++; | |
71263992 GA |
752 | if (q->flows[outer_hash + k].set == CAKE_SET_BULK) { |
753 | q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--; | |
754 | q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--; | |
755 | } | |
046f6fd5 THJ |
756 | allocate_src = cake_dsrc(flow_mode); |
757 | allocate_dst = cake_ddst(flow_mode); | |
758 | found: | |
759 | /* reserve queue for future packets in same flow */ | |
760 | reduced_hash = outer_hash + k; | |
761 | q->tags[reduced_hash] = flow_hash; | |
762 | ||
763 | if (allocate_src) { | |
764 | srchost_idx = srchost_hash % CAKE_QUEUES; | |
765 | inner_hash = srchost_idx % CAKE_SET_WAYS; | |
766 | outer_hash = srchost_idx - inner_hash; | |
767 | for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; | |
768 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
769 | if (q->hosts[outer_hash + k].srchost_tag == | |
770 | srchost_hash) | |
771 | goto found_src; | |
772 | } | |
773 | for (i = 0; i < CAKE_SET_WAYS; | |
774 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
71263992 | 775 | if (!q->hosts[outer_hash + k].srchost_bulk_flow_count) |
046f6fd5 THJ |
776 | break; |
777 | } | |
778 | q->hosts[outer_hash + k].srchost_tag = srchost_hash; | |
779 | found_src: | |
780 | srchost_idx = outer_hash + k; | |
71263992 GA |
781 | if (q->flows[reduced_hash].set == CAKE_SET_BULK) |
782 | q->hosts[srchost_idx].srchost_bulk_flow_count++; | |
046f6fd5 THJ |
783 | q->flows[reduced_hash].srchost = srchost_idx; |
784 | } | |
785 | ||
786 | if (allocate_dst) { | |
787 | dsthost_idx = dsthost_hash % CAKE_QUEUES; | |
788 | inner_hash = dsthost_idx % CAKE_SET_WAYS; | |
789 | outer_hash = dsthost_idx - inner_hash; | |
790 | for (i = 0, k = inner_hash; i < CAKE_SET_WAYS; | |
791 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
792 | if (q->hosts[outer_hash + k].dsthost_tag == | |
793 | dsthost_hash) | |
794 | goto found_dst; | |
795 | } | |
796 | for (i = 0; i < CAKE_SET_WAYS; | |
797 | i++, k = (k + 1) % CAKE_SET_WAYS) { | |
71263992 | 798 | if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count) |
046f6fd5 THJ |
799 | break; |
800 | } | |
801 | q->hosts[outer_hash + k].dsthost_tag = dsthost_hash; | |
802 | found_dst: | |
803 | dsthost_idx = outer_hash + k; | |
71263992 GA |
804 | if (q->flows[reduced_hash].set == CAKE_SET_BULK) |
805 | q->hosts[dsthost_idx].dsthost_bulk_flow_count++; | |
046f6fd5 THJ |
806 | q->flows[reduced_hash].dsthost = dsthost_idx; |
807 | } | |
808 | } | |
809 | ||
810 | return reduced_hash; | |
811 | } | |
812 | ||
813 | /* helper functions : might be changed when/if skb use a standard list_head */ | |
814 | /* remove one skb from head of slot queue */ | |
815 | ||
816 | static struct sk_buff *dequeue_head(struct cake_flow *flow) | |
817 | { | |
818 | struct sk_buff *skb = flow->head; | |
819 | ||
820 | if (skb) { | |
821 | flow->head = skb->next; | |
a8305bff | 822 | skb_mark_not_on_list(skb); |
046f6fd5 THJ |
823 | } |
824 | ||
825 | return skb; | |
826 | } | |
827 | ||
828 | /* add skb to flow queue (tail add) */ | |
829 | ||
830 | static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb) | |
831 | { | |
832 | if (!flow->head) | |
833 | flow->head = skb; | |
834 | else | |
835 | flow->tail->next = skb; | |
836 | flow->tail = skb; | |
837 | skb->next = NULL; | |
838 | } | |
839 | ||
8b713881 THJ |
840 | static struct iphdr *cake_get_iphdr(const struct sk_buff *skb, |
841 | struct ipv6hdr *buf) | |
842 | { | |
843 | unsigned int offset = skb_network_offset(skb); | |
844 | struct iphdr *iph; | |
845 | ||
846 | iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf); | |
847 | ||
848 | if (!iph) | |
849 | return NULL; | |
850 | ||
851 | if (iph->version == 4 && iph->protocol == IPPROTO_IPV6) | |
852 | return skb_header_pointer(skb, offset + iph->ihl * 4, | |
853 | sizeof(struct ipv6hdr), buf); | |
854 | ||
855 | else if (iph->version == 4) | |
856 | return iph; | |
857 | ||
858 | else if (iph->version == 6) | |
859 | return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr), | |
860 | buf); | |
861 | ||
862 | return NULL; | |
863 | } | |
864 | ||
865 | static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb, | |
866 | void *buf, unsigned int bufsize) | |
867 | { | |
868 | unsigned int offset = skb_network_offset(skb); | |
869 | const struct ipv6hdr *ipv6h; | |
870 | const struct tcphdr *tcph; | |
871 | const struct iphdr *iph; | |
872 | struct ipv6hdr _ipv6h; | |
873 | struct tcphdr _tcph; | |
874 | ||
875 | ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h); | |
876 | ||
877 | if (!ipv6h) | |
878 | return NULL; | |
879 | ||
880 | if (ipv6h->version == 4) { | |
881 | iph = (struct iphdr *)ipv6h; | |
882 | offset += iph->ihl * 4; | |
883 | ||
884 | /* special-case 6in4 tunnelling, as that is a common way to get | |
885 | * v6 connectivity in the home | |
886 | */ | |
887 | if (iph->protocol == IPPROTO_IPV6) { | |
888 | ipv6h = skb_header_pointer(skb, offset, | |
889 | sizeof(_ipv6h), &_ipv6h); | |
890 | ||
891 | if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP) | |
892 | return NULL; | |
893 | ||
894 | offset += sizeof(struct ipv6hdr); | |
895 | ||
896 | } else if (iph->protocol != IPPROTO_TCP) { | |
897 | return NULL; | |
898 | } | |
899 | ||
900 | } else if (ipv6h->version == 6) { | |
901 | if (ipv6h->nexthdr != IPPROTO_TCP) | |
902 | return NULL; | |
903 | ||
904 | offset += sizeof(struct ipv6hdr); | |
905 | } else { | |
906 | return NULL; | |
907 | } | |
908 | ||
909 | tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph); | |
910 | if (!tcph) | |
911 | return NULL; | |
912 | ||
913 | return skb_header_pointer(skb, offset, | |
914 | min(__tcp_hdrlen(tcph), bufsize), buf); | |
915 | } | |
916 | ||
917 | static const void *cake_get_tcpopt(const struct tcphdr *tcph, | |
918 | int code, int *oplen) | |
919 | { | |
920 | /* inspired by tcp_parse_options in tcp_input.c */ | |
921 | int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr); | |
922 | const u8 *ptr = (const u8 *)(tcph + 1); | |
923 | ||
924 | while (length > 0) { | |
925 | int opcode = *ptr++; | |
926 | int opsize; | |
927 | ||
928 | if (opcode == TCPOPT_EOL) | |
929 | break; | |
930 | if (opcode == TCPOPT_NOP) { | |
931 | length--; | |
932 | continue; | |
933 | } | |
934 | opsize = *ptr++; | |
935 | if (opsize < 2 || opsize > length) | |
936 | break; | |
937 | ||
938 | if (opcode == code) { | |
939 | *oplen = opsize; | |
940 | return ptr; | |
941 | } | |
942 | ||
943 | ptr += opsize - 2; | |
944 | length -= opsize; | |
945 | } | |
946 | ||
947 | return NULL; | |
948 | } | |
949 | ||
950 | /* Compare two SACK sequences. A sequence is considered greater if it SACKs more | |
951 | * bytes than the other. In the case where both sequences ACKs bytes that the | |
952 | * other doesn't, A is considered greater. DSACKs in A also makes A be | |
953 | * considered greater. | |
954 | * | |
955 | * @return -1, 0 or 1 as normal compare functions | |
956 | */ | |
957 | static int cake_tcph_sack_compare(const struct tcphdr *tcph_a, | |
958 | const struct tcphdr *tcph_b) | |
959 | { | |
960 | const struct tcp_sack_block_wire *sack_a, *sack_b; | |
961 | u32 ack_seq_a = ntohl(tcph_a->ack_seq); | |
962 | u32 bytes_a = 0, bytes_b = 0; | |
963 | int oplen_a, oplen_b; | |
964 | bool first = true; | |
965 | ||
966 | sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a); | |
967 | sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b); | |
968 | ||
969 | /* pointers point to option contents */ | |
970 | oplen_a -= TCPOLEN_SACK_BASE; | |
971 | oplen_b -= TCPOLEN_SACK_BASE; | |
972 | ||
973 | if (sack_a && oplen_a >= sizeof(*sack_a) && | |
974 | (!sack_b || oplen_b < sizeof(*sack_b))) | |
975 | return -1; | |
976 | else if (sack_b && oplen_b >= sizeof(*sack_b) && | |
977 | (!sack_a || oplen_a < sizeof(*sack_a))) | |
978 | return 1; | |
979 | else if ((!sack_a || oplen_a < sizeof(*sack_a)) && | |
980 | (!sack_b || oplen_b < sizeof(*sack_b))) | |
981 | return 0; | |
982 | ||
983 | while (oplen_a >= sizeof(*sack_a)) { | |
984 | const struct tcp_sack_block_wire *sack_tmp = sack_b; | |
985 | u32 start_a = get_unaligned_be32(&sack_a->start_seq); | |
986 | u32 end_a = get_unaligned_be32(&sack_a->end_seq); | |
987 | int oplen_tmp = oplen_b; | |
988 | bool found = false; | |
989 | ||
990 | /* DSACK; always considered greater to prevent dropping */ | |
991 | if (before(start_a, ack_seq_a)) | |
992 | return -1; | |
993 | ||
994 | bytes_a += end_a - start_a; | |
995 | ||
996 | while (oplen_tmp >= sizeof(*sack_tmp)) { | |
997 | u32 start_b = get_unaligned_be32(&sack_tmp->start_seq); | |
998 | u32 end_b = get_unaligned_be32(&sack_tmp->end_seq); | |
999 | ||
1000 | /* first time through we count the total size */ | |
1001 | if (first) | |
1002 | bytes_b += end_b - start_b; | |
1003 | ||
1004 | if (!after(start_b, start_a) && !before(end_b, end_a)) { | |
1005 | found = true; | |
1006 | if (!first) | |
1007 | break; | |
1008 | } | |
1009 | oplen_tmp -= sizeof(*sack_tmp); | |
1010 | sack_tmp++; | |
1011 | } | |
1012 | ||
1013 | if (!found) | |
1014 | return -1; | |
1015 | ||
1016 | oplen_a -= sizeof(*sack_a); | |
1017 | sack_a++; | |
1018 | first = false; | |
1019 | } | |
1020 | ||
1021 | /* If we made it this far, all ranges SACKed by A are covered by B, so | |
1022 | * either the SACKs are equal, or B SACKs more bytes. | |
1023 | */ | |
1024 | return bytes_b > bytes_a ? 1 : 0; | |
1025 | } | |
1026 | ||
1027 | static void cake_tcph_get_tstamp(const struct tcphdr *tcph, | |
1028 | u32 *tsval, u32 *tsecr) | |
1029 | { | |
1030 | const u8 *ptr; | |
1031 | int opsize; | |
1032 | ||
1033 | ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize); | |
1034 | ||
1035 | if (ptr && opsize == TCPOLEN_TIMESTAMP) { | |
1036 | *tsval = get_unaligned_be32(ptr); | |
1037 | *tsecr = get_unaligned_be32(ptr + 4); | |
1038 | } | |
1039 | } | |
1040 | ||
1041 | static bool cake_tcph_may_drop(const struct tcphdr *tcph, | |
1042 | u32 tstamp_new, u32 tsecr_new) | |
1043 | { | |
1044 | /* inspired by tcp_parse_options in tcp_input.c */ | |
1045 | int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr); | |
1046 | const u8 *ptr = (const u8 *)(tcph + 1); | |
1047 | u32 tstamp, tsecr; | |
1048 | ||
1049 | /* 3 reserved flags must be unset to avoid future breakage | |
1050 | * ACK must be set | |
1051 | * ECE/CWR are handled separately | |
1052 | * All other flags URG/PSH/RST/SYN/FIN must be unset | |
1053 | * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero) | |
1054 | * 0x00C00000 = CWR/ECE (handled separately) | |
1055 | * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000 | |
1056 | */ | |
1057 | if (((tcp_flag_word(tcph) & | |
1058 | cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK)) | |
1059 | return false; | |
1060 | ||
1061 | while (length > 0) { | |
1062 | int opcode = *ptr++; | |
1063 | int opsize; | |
1064 | ||
1065 | if (opcode == TCPOPT_EOL) | |
1066 | break; | |
1067 | if (opcode == TCPOPT_NOP) { | |
1068 | length--; | |
1069 | continue; | |
1070 | } | |
1071 | opsize = *ptr++; | |
1072 | if (opsize < 2 || opsize > length) | |
1073 | break; | |
1074 | ||
1075 | switch (opcode) { | |
1076 | case TCPOPT_MD5SIG: /* doesn't influence state */ | |
1077 | break; | |
1078 | ||
1079 | case TCPOPT_SACK: /* stricter checking performed later */ | |
1080 | if (opsize % 8 != 2) | |
1081 | return false; | |
1082 | break; | |
1083 | ||
1084 | case TCPOPT_TIMESTAMP: | |
1085 | /* only drop timestamps lower than new */ | |
1086 | if (opsize != TCPOLEN_TIMESTAMP) | |
1087 | return false; | |
1088 | tstamp = get_unaligned_be32(ptr); | |
1089 | tsecr = get_unaligned_be32(ptr + 4); | |
1090 | if (after(tstamp, tstamp_new) || | |
1091 | after(tsecr, tsecr_new)) | |
1092 | return false; | |
1093 | break; | |
1094 | ||
1095 | case TCPOPT_MSS: /* these should only be set on SYN */ | |
1096 | case TCPOPT_WINDOW: | |
1097 | case TCPOPT_SACK_PERM: | |
1098 | case TCPOPT_FASTOPEN: | |
1099 | case TCPOPT_EXP: | |
1100 | default: /* don't drop if any unknown options are present */ | |
1101 | return false; | |
1102 | } | |
1103 | ||
1104 | ptr += opsize - 2; | |
1105 | length -= opsize; | |
1106 | } | |
1107 | ||
1108 | return true; | |
1109 | } | |
1110 | ||
1111 | static struct sk_buff *cake_ack_filter(struct cake_sched_data *q, | |
1112 | struct cake_flow *flow) | |
1113 | { | |
1114 | bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE; | |
1115 | struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL; | |
1116 | struct sk_buff *skb_check, *skb_prev = NULL; | |
1117 | const struct ipv6hdr *ipv6h, *ipv6h_check; | |
1118 | unsigned char _tcph[64], _tcph_check[64]; | |
1119 | const struct tcphdr *tcph, *tcph_check; | |
1120 | const struct iphdr *iph, *iph_check; | |
1121 | struct ipv6hdr _iph, _iph_check; | |
1122 | const struct sk_buff *skb; | |
1123 | int seglen, num_found = 0; | |
1124 | u32 tstamp = 0, tsecr = 0; | |
1125 | __be32 elig_flags = 0; | |
1126 | int sack_comp; | |
1127 | ||
1128 | /* no other possible ACKs to filter */ | |
1129 | if (flow->head == flow->tail) | |
1130 | return NULL; | |
1131 | ||
1132 | skb = flow->tail; | |
1133 | tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph)); | |
1134 | iph = cake_get_iphdr(skb, &_iph); | |
1135 | if (!tcph) | |
1136 | return NULL; | |
1137 | ||
1138 | cake_tcph_get_tstamp(tcph, &tstamp, &tsecr); | |
1139 | ||
1140 | /* the 'triggering' packet need only have the ACK flag set. | |
1141 | * also check that SYN is not set, as there won't be any previous ACKs. | |
1142 | */ | |
1143 | if ((tcp_flag_word(tcph) & | |
1144 | (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK) | |
1145 | return NULL; | |
1146 | ||
1147 | /* the 'triggering' ACK is at the tail of the queue, we have already | |
1148 | * returned if it is the only packet in the flow. loop through the rest | |
1149 | * of the queue looking for pure ACKs with the same 5-tuple as the | |
1150 | * triggering one. | |
1151 | */ | |
1152 | for (skb_check = flow->head; | |
1153 | skb_check && skb_check != skb; | |
1154 | skb_prev = skb_check, skb_check = skb_check->next) { | |
1155 | iph_check = cake_get_iphdr(skb_check, &_iph_check); | |
1156 | tcph_check = cake_get_tcphdr(skb_check, &_tcph_check, | |
1157 | sizeof(_tcph_check)); | |
1158 | ||
1159 | /* only TCP packets with matching 5-tuple are eligible, and only | |
1160 | * drop safe headers | |
1161 | */ | |
1162 | if (!tcph_check || iph->version != iph_check->version || | |
1163 | tcph_check->source != tcph->source || | |
1164 | tcph_check->dest != tcph->dest) | |
1165 | continue; | |
1166 | ||
1167 | if (iph_check->version == 4) { | |
1168 | if (iph_check->saddr != iph->saddr || | |
1169 | iph_check->daddr != iph->daddr) | |
1170 | continue; | |
1171 | ||
1172 | seglen = ntohs(iph_check->tot_len) - | |
1173 | (4 * iph_check->ihl); | |
1174 | } else if (iph_check->version == 6) { | |
1175 | ipv6h = (struct ipv6hdr *)iph; | |
1176 | ipv6h_check = (struct ipv6hdr *)iph_check; | |
1177 | ||
1178 | if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) || | |
1179 | ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr)) | |
1180 | continue; | |
1181 | ||
1182 | seglen = ntohs(ipv6h_check->payload_len); | |
1183 | } else { | |
1184 | WARN_ON(1); /* shouldn't happen */ | |
1185 | continue; | |
1186 | } | |
1187 | ||
1188 | /* If the ECE/CWR flags changed from the previous eligible | |
1189 | * packet in the same flow, we should no longer be dropping that | |
1190 | * previous packet as this would lose information. | |
1191 | */ | |
1192 | if (elig_ack && (tcp_flag_word(tcph_check) & | |
1193 | (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) { | |
1194 | elig_ack = NULL; | |
1195 | elig_ack_prev = NULL; | |
1196 | num_found--; | |
1197 | } | |
1198 | ||
1199 | /* Check TCP options and flags, don't drop ACKs with segment | |
1200 | * data, and don't drop ACKs with a higher cumulative ACK | |
1201 | * counter than the triggering packet. Check ACK seqno here to | |
1202 | * avoid parsing SACK options of packets we are going to exclude | |
1203 | * anyway. | |
1204 | */ | |
1205 | if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) || | |
1206 | (seglen - __tcp_hdrlen(tcph_check)) != 0 || | |
1207 | after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq))) | |
1208 | continue; | |
1209 | ||
1210 | /* Check SACK options. The triggering packet must SACK more data | |
1211 | * than the ACK under consideration, or SACK the same range but | |
1212 | * have a larger cumulative ACK counter. The latter is a | |
1213 | * pathological case, but is contained in the following check | |
1214 | * anyway, just to be safe. | |
1215 | */ | |
1216 | sack_comp = cake_tcph_sack_compare(tcph_check, tcph); | |
1217 | ||
1218 | if (sack_comp < 0 || | |
1219 | (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) && | |
1220 | sack_comp == 0)) | |
1221 | continue; | |
1222 | ||
1223 | /* At this point we have found an eligible pure ACK to drop; if | |
1224 | * we are in aggressive mode, we are done. Otherwise, keep | |
1225 | * searching unless this is the second eligible ACK we | |
1226 | * found. | |
1227 | * | |
1228 | * Since we want to drop ACK closest to the head of the queue, | |
1229 | * save the first eligible ACK we find, even if we need to loop | |
1230 | * again. | |
1231 | */ | |
1232 | if (!elig_ack) { | |
1233 | elig_ack = skb_check; | |
1234 | elig_ack_prev = skb_prev; | |
1235 | elig_flags = (tcp_flag_word(tcph_check) | |
1236 | & (TCP_FLAG_ECE | TCP_FLAG_CWR)); | |
1237 | } | |
1238 | ||
1239 | if (num_found++ > 0) | |
1240 | goto found; | |
1241 | } | |
1242 | ||
1243 | /* We made it through the queue without finding two eligible ACKs . If | |
1244 | * we found a single eligible ACK we can drop it in aggressive mode if | |
1245 | * we can guarantee that this does not interfere with ECN flag | |
1246 | * information. We ensure this by dropping it only if the enqueued | |
1247 | * packet is consecutive with the eligible ACK, and their flags match. | |
1248 | */ | |
1249 | if (elig_ack && aggressive && elig_ack->next == skb && | |
1250 | (elig_flags == (tcp_flag_word(tcph) & | |
1251 | (TCP_FLAG_ECE | TCP_FLAG_CWR)))) | |
1252 | goto found; | |
1253 | ||
1254 | return NULL; | |
1255 | ||
1256 | found: | |
1257 | if (elig_ack_prev) | |
1258 | elig_ack_prev->next = elig_ack->next; | |
1259 | else | |
1260 | flow->head = elig_ack->next; | |
1261 | ||
a8305bff | 1262 | skb_mark_not_on_list(elig_ack); |
8b713881 THJ |
1263 | |
1264 | return elig_ack; | |
1265 | } | |
1266 | ||
046f6fd5 THJ |
1267 | static u64 cake_ewma(u64 avg, u64 sample, u32 shift) |
1268 | { | |
1269 | avg -= avg >> shift; | |
1270 | avg += sample >> shift; | |
1271 | return avg; | |
1272 | } | |
1273 | ||
a729b7f0 THJ |
1274 | static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off) |
1275 | { | |
1276 | if (q->rate_flags & CAKE_FLAG_OVERHEAD) | |
1277 | len -= off; | |
1278 | ||
1279 | if (q->max_netlen < len) | |
1280 | q->max_netlen = len; | |
1281 | if (q->min_netlen > len) | |
1282 | q->min_netlen = len; | |
1283 | ||
1284 | len += q->rate_overhead; | |
1285 | ||
1286 | if (len < q->rate_mpu) | |
1287 | len = q->rate_mpu; | |
1288 | ||
1289 | if (q->atm_mode == CAKE_ATM_ATM) { | |
1290 | len += 47; | |
1291 | len /= 48; | |
1292 | len *= 53; | |
1293 | } else if (q->atm_mode == CAKE_ATM_PTM) { | |
1294 | /* Add one byte per 64 bytes or part thereof. | |
1295 | * This is conservative and easier to calculate than the | |
1296 | * precise value. | |
1297 | */ | |
1298 | len += (len + 63) / 64; | |
1299 | } | |
1300 | ||
1301 | if (q->max_adjlen < len) | |
1302 | q->max_adjlen = len; | |
1303 | if (q->min_adjlen > len) | |
1304 | q->min_adjlen = len; | |
1305 | ||
1306 | return len; | |
1307 | } | |
1308 | ||
1309 | static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb) | |
1310 | { | |
1311 | const struct skb_shared_info *shinfo = skb_shinfo(skb); | |
1312 | unsigned int hdr_len, last_len = 0; | |
1313 | u32 off = skb_network_offset(skb); | |
1314 | u32 len = qdisc_pkt_len(skb); | |
1315 | u16 segs = 1; | |
1316 | ||
1317 | q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8); | |
1318 | ||
1319 | if (!shinfo->gso_size) | |
1320 | return cake_calc_overhead(q, len, off); | |
1321 | ||
1322 | /* borrowed from qdisc_pkt_len_init() */ | |
1323 | hdr_len = skb_transport_header(skb) - skb_mac_header(skb); | |
1324 | ||
1325 | /* + transport layer */ | |
1326 | if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | | |
1327 | SKB_GSO_TCPV6))) { | |
1328 | const struct tcphdr *th; | |
1329 | struct tcphdr _tcphdr; | |
1330 | ||
1331 | th = skb_header_pointer(skb, skb_transport_offset(skb), | |
1332 | sizeof(_tcphdr), &_tcphdr); | |
1333 | if (likely(th)) | |
1334 | hdr_len += __tcp_hdrlen(th); | |
1335 | } else { | |
1336 | struct udphdr _udphdr; | |
1337 | ||
1338 | if (skb_header_pointer(skb, skb_transport_offset(skb), | |
1339 | sizeof(_udphdr), &_udphdr)) | |
1340 | hdr_len += sizeof(struct udphdr); | |
1341 | } | |
1342 | ||
1343 | if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) | |
1344 | segs = DIV_ROUND_UP(skb->len - hdr_len, | |
1345 | shinfo->gso_size); | |
1346 | else | |
1347 | segs = shinfo->gso_segs; | |
1348 | ||
1349 | len = shinfo->gso_size + hdr_len; | |
1350 | last_len = skb->len - shinfo->gso_size * (segs - 1); | |
1351 | ||
1352 | return (cake_calc_overhead(q, len, off) * (segs - 1) + | |
1353 | cake_calc_overhead(q, last_len, off)); | |
1354 | } | |
1355 | ||
046f6fd5 THJ |
1356 | static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j) |
1357 | { | |
1358 | struct cake_heap_entry ii = q->overflow_heap[i]; | |
1359 | struct cake_heap_entry jj = q->overflow_heap[j]; | |
1360 | ||
1361 | q->overflow_heap[i] = jj; | |
1362 | q->overflow_heap[j] = ii; | |
1363 | ||
1364 | q->tins[ii.t].overflow_idx[ii.b] = j; | |
1365 | q->tins[jj.t].overflow_idx[jj.b] = i; | |
1366 | } | |
1367 | ||
1368 | static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i) | |
1369 | { | |
1370 | struct cake_heap_entry ii = q->overflow_heap[i]; | |
1371 | ||
1372 | return q->tins[ii.t].backlogs[ii.b]; | |
1373 | } | |
1374 | ||
1375 | static void cake_heapify(struct cake_sched_data *q, u16 i) | |
1376 | { | |
1377 | static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES; | |
1378 | u32 mb = cake_heap_get_backlog(q, i); | |
1379 | u32 m = i; | |
1380 | ||
1381 | while (m < a) { | |
1382 | u32 l = m + m + 1; | |
1383 | u32 r = l + 1; | |
1384 | ||
1385 | if (l < a) { | |
1386 | u32 lb = cake_heap_get_backlog(q, l); | |
1387 | ||
1388 | if (lb > mb) { | |
1389 | m = l; | |
1390 | mb = lb; | |
1391 | } | |
1392 | } | |
1393 | ||
1394 | if (r < a) { | |
1395 | u32 rb = cake_heap_get_backlog(q, r); | |
1396 | ||
1397 | if (rb > mb) { | |
1398 | m = r; | |
1399 | mb = rb; | |
1400 | } | |
1401 | } | |
1402 | ||
1403 | if (m != i) { | |
1404 | cake_heap_swap(q, i, m); | |
1405 | i = m; | |
1406 | } else { | |
1407 | break; | |
1408 | } | |
1409 | } | |
1410 | } | |
1411 | ||
1412 | static void cake_heapify_up(struct cake_sched_data *q, u16 i) | |
1413 | { | |
1414 | while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) { | |
1415 | u16 p = (i - 1) >> 1; | |
1416 | u32 ib = cake_heap_get_backlog(q, i); | |
1417 | u32 pb = cake_heap_get_backlog(q, p); | |
1418 | ||
1419 | if (ib > pb) { | |
1420 | cake_heap_swap(q, i, p); | |
1421 | i = p; | |
1422 | } else { | |
1423 | break; | |
1424 | } | |
1425 | } | |
1426 | } | |
1427 | ||
1428 | static int cake_advance_shaper(struct cake_sched_data *q, | |
1429 | struct cake_tin_data *b, | |
1430 | struct sk_buff *skb, | |
1431 | ktime_t now, bool drop) | |
1432 | { | |
a729b7f0 | 1433 | u32 len = get_cobalt_cb(skb)->adjusted_len; |
046f6fd5 THJ |
1434 | |
1435 | /* charge packet bandwidth to this tin | |
1436 | * and to the global shaper. | |
1437 | */ | |
1438 | if (q->rate_ns) { | |
1439 | u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft; | |
1440 | u64 global_dur = (len * q->rate_ns) >> q->rate_shft; | |
1441 | u64 failsafe_dur = global_dur + (global_dur >> 1); | |
1442 | ||
1443 | if (ktime_before(b->time_next_packet, now)) | |
1444 | b->time_next_packet = ktime_add_ns(b->time_next_packet, | |
1445 | tin_dur); | |
1446 | ||
1447 | else if (ktime_before(b->time_next_packet, | |
1448 | ktime_add_ns(now, tin_dur))) | |
1449 | b->time_next_packet = ktime_add_ns(now, tin_dur); | |
1450 | ||
1451 | q->time_next_packet = ktime_add_ns(q->time_next_packet, | |
1452 | global_dur); | |
1453 | if (!drop) | |
1454 | q->failsafe_next_packet = \ | |
1455 | ktime_add_ns(q->failsafe_next_packet, | |
1456 | failsafe_dur); | |
1457 | } | |
1458 | return len; | |
1459 | } | |
1460 | ||
1461 | static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) | |
1462 | { | |
1463 | struct cake_sched_data *q = qdisc_priv(sch); | |
1464 | ktime_t now = ktime_get(); | |
1465 | u32 idx = 0, tin = 0, len; | |
1466 | struct cake_heap_entry qq; | |
1467 | struct cake_tin_data *b; | |
1468 | struct cake_flow *flow; | |
1469 | struct sk_buff *skb; | |
1470 | ||
1471 | if (!q->overflow_timeout) { | |
1472 | int i; | |
1473 | /* Build fresh max-heap */ | |
1474 | for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--) | |
1475 | cake_heapify(q, i); | |
1476 | } | |
1477 | q->overflow_timeout = 65535; | |
1478 | ||
1479 | /* select longest queue for pruning */ | |
1480 | qq = q->overflow_heap[0]; | |
1481 | tin = qq.t; | |
1482 | idx = qq.b; | |
1483 | ||
1484 | b = &q->tins[tin]; | |
1485 | flow = &b->flows[idx]; | |
1486 | skb = dequeue_head(flow); | |
1487 | if (unlikely(!skb)) { | |
1488 | /* heap has gone wrong, rebuild it next time */ | |
1489 | q->overflow_timeout = 0; | |
1490 | return idx + (tin << 16); | |
1491 | } | |
1492 | ||
1493 | if (cobalt_queue_full(&flow->cvars, &b->cparams, now)) | |
1494 | b->unresponsive_flow_count++; | |
1495 | ||
1496 | len = qdisc_pkt_len(skb); | |
1497 | q->buffer_used -= skb->truesize; | |
1498 | b->backlogs[idx] -= len; | |
1499 | b->tin_backlog -= len; | |
1500 | sch->qstats.backlog -= len; | |
1501 | qdisc_tree_reduce_backlog(sch, 1, len); | |
1502 | ||
1503 | flow->dropped++; | |
1504 | b->tin_dropped++; | |
1505 | sch->qstats.drops++; | |
1506 | ||
7298de9c THJ |
1507 | if (q->rate_flags & CAKE_FLAG_INGRESS) |
1508 | cake_advance_shaper(q, b, skb, now, true); | |
1509 | ||
046f6fd5 THJ |
1510 | __qdisc_drop(skb, to_free); |
1511 | sch->q.qlen--; | |
1512 | ||
1513 | cake_heapify(q, 0); | |
1514 | ||
1515 | return idx + (tin << 16); | |
1516 | } | |
1517 | ||
83f8fd69 THJ |
1518 | static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) |
1519 | { | |
c87b4ecd | 1520 | int wlen = skb_network_offset(skb); |
83f8fd69 THJ |
1521 | u8 dscp; |
1522 | ||
b2100cc5 | 1523 | switch (tc_skb_protocol(skb)) { |
83f8fd69 | 1524 | case htons(ETH_P_IP): |
c87b4ecd THJ |
1525 | wlen += sizeof(struct iphdr); |
1526 | if (!pskb_may_pull(skb, wlen) || | |
1527 | skb_try_make_writable(skb, wlen)) | |
1528 | return 0; | |
1529 | ||
83f8fd69 THJ |
1530 | dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; |
1531 | if (wash && dscp) | |
1532 | ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); | |
1533 | return dscp; | |
1534 | ||
1535 | case htons(ETH_P_IPV6): | |
c87b4ecd THJ |
1536 | wlen += sizeof(struct ipv6hdr); |
1537 | if (!pskb_may_pull(skb, wlen) || | |
1538 | skb_try_make_writable(skb, wlen)) | |
1539 | return 0; | |
1540 | ||
83f8fd69 THJ |
1541 | dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; |
1542 | if (wash && dscp) | |
1543 | ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); | |
1544 | return dscp; | |
1545 | ||
1546 | case htons(ETH_P_ARP): | |
1547 | return 0x38; /* CS7 - Net Control */ | |
1548 | ||
1549 | default: | |
1550 | /* If there is no Diffserv field, treat as best-effort */ | |
1551 | return 0; | |
1552 | } | |
1553 | } | |
1554 | ||
1555 | static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, | |
1556 | struct sk_buff *skb) | |
1557 | { | |
1558 | struct cake_sched_data *q = qdisc_priv(sch); | |
eab2fc82 | 1559 | u32 tin, mark; |
4976e3c6 | 1560 | u8 dscp; |
83f8fd69 | 1561 | |
4976e3c6 THJ |
1562 | /* Tin selection: Default to diffserv-based selection, allow overriding |
1563 | * using firewall marks or skb->priority. | |
1564 | */ | |
1565 | dscp = cake_handle_diffserv(skb, | |
1566 | q->rate_flags & CAKE_FLAG_WASH); | |
eab2fc82 | 1567 | mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; |
83f8fd69 | 1568 | |
4976e3c6 | 1569 | if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) |
83f8fd69 | 1570 | tin = 0; |
4976e3c6 | 1571 | |
eab2fc82 THJ |
1572 | else if (mark && mark <= q->tin_cnt) |
1573 | tin = q->tin_order[mark - 1]; | |
4976e3c6 THJ |
1574 | |
1575 | else if (TC_H_MAJ(skb->priority) == sch->handle && | |
1576 | TC_H_MIN(skb->priority) > 0 && | |
1577 | TC_H_MIN(skb->priority) <= q->tin_cnt) | |
1578 | tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; | |
1579 | ||
1580 | else { | |
1581 | tin = q->tin_index[dscp]; | |
1582 | ||
1583 | if (unlikely(tin >= q->tin_cnt)) | |
1584 | tin = 0; | |
83f8fd69 THJ |
1585 | } |
1586 | ||
1587 | return &q->tins[tin]; | |
1588 | } | |
1589 | ||
1590 | static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t, | |
046f6fd5 THJ |
1591 | struct sk_buff *skb, int flow_mode, int *qerr) |
1592 | { | |
1593 | struct cake_sched_data *q = qdisc_priv(sch); | |
1594 | struct tcf_proto *filter; | |
1595 | struct tcf_result res; | |
93cfb6c1 | 1596 | u16 flow = 0, host = 0; |
046f6fd5 THJ |
1597 | int result; |
1598 | ||
1599 | filter = rcu_dereference_bh(q->filter_list); | |
1600 | if (!filter) | |
83f8fd69 | 1601 | goto hash; |
046f6fd5 THJ |
1602 | |
1603 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | |
1604 | result = tcf_classify(skb, filter, &res, false); | |
83f8fd69 | 1605 | |
046f6fd5 THJ |
1606 | if (result >= 0) { |
1607 | #ifdef CONFIG_NET_CLS_ACT | |
1608 | switch (result) { | |
1609 | case TC_ACT_STOLEN: | |
1610 | case TC_ACT_QUEUED: | |
1611 | case TC_ACT_TRAP: | |
1612 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | |
1613 | /* fall through */ | |
1614 | case TC_ACT_SHOT: | |
1615 | return 0; | |
1616 | } | |
1617 | #endif | |
1618 | if (TC_H_MIN(res.classid) <= CAKE_QUEUES) | |
83f8fd69 | 1619 | flow = TC_H_MIN(res.classid); |
93cfb6c1 THJ |
1620 | if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16)) |
1621 | host = TC_H_MAJ(res.classid) >> 16; | |
046f6fd5 | 1622 | } |
83f8fd69 THJ |
1623 | hash: |
1624 | *t = cake_select_tin(sch, skb); | |
93cfb6c1 | 1625 | return cake_hash(*t, skb, flow_mode, flow, host) + 1; |
046f6fd5 THJ |
1626 | } |
1627 | ||
7298de9c THJ |
1628 | static void cake_reconfigure(struct Qdisc *sch); |
1629 | ||
046f6fd5 THJ |
1630 | static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, |
1631 | struct sk_buff **to_free) | |
1632 | { | |
1633 | struct cake_sched_data *q = qdisc_priv(sch); | |
1634 | int len = qdisc_pkt_len(skb); | |
1635 | int uninitialized_var(ret); | |
8b713881 | 1636 | struct sk_buff *ack = NULL; |
046f6fd5 THJ |
1637 | ktime_t now = ktime_get(); |
1638 | struct cake_tin_data *b; | |
1639 | struct cake_flow *flow; | |
83f8fd69 | 1640 | u32 idx; |
046f6fd5 THJ |
1641 | |
1642 | /* choose flow to insert into */ | |
83f8fd69 | 1643 | idx = cake_classify(sch, &b, skb, q->flow_mode, &ret); |
046f6fd5 THJ |
1644 | if (idx == 0) { |
1645 | if (ret & __NET_XMIT_BYPASS) | |
1646 | qdisc_qstats_drop(sch); | |
1647 | __qdisc_drop(skb, to_free); | |
1648 | return ret; | |
1649 | } | |
1650 | idx--; | |
1651 | flow = &b->flows[idx]; | |
1652 | ||
1653 | /* ensure shaper state isn't stale */ | |
1654 | if (!b->tin_backlog) { | |
1655 | if (ktime_before(b->time_next_packet, now)) | |
1656 | b->time_next_packet = now; | |
1657 | ||
1658 | if (!sch->q.qlen) { | |
1659 | if (ktime_before(q->time_next_packet, now)) { | |
1660 | q->failsafe_next_packet = now; | |
1661 | q->time_next_packet = now; | |
1662 | } else if (ktime_after(q->time_next_packet, now) && | |
1663 | ktime_after(q->failsafe_next_packet, now)) { | |
1664 | u64 next = \ | |
1665 | min(ktime_to_ns(q->time_next_packet), | |
1666 | ktime_to_ns( | |
1667 | q->failsafe_next_packet)); | |
1668 | sch->qstats.overlimits++; | |
1669 | qdisc_watchdog_schedule_ns(&q->watchdog, next); | |
1670 | } | |
1671 | } | |
1672 | } | |
1673 | ||
1674 | if (unlikely(len > b->max_skblen)) | |
1675 | b->max_skblen = len; | |
1676 | ||
0c850344 THJ |
1677 | if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) { |
1678 | struct sk_buff *segs, *nskb; | |
1679 | netdev_features_t features = netif_skb_features(skb); | |
8c6c37fd | 1680 | unsigned int slen = 0, numsegs = 0; |
0c850344 THJ |
1681 | |
1682 | segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); | |
1683 | if (IS_ERR_OR_NULL(segs)) | |
1684 | return qdisc_drop(skb, sch, to_free); | |
1685 | ||
1686 | while (segs) { | |
1687 | nskb = segs->next; | |
a8305bff | 1688 | skb_mark_not_on_list(segs); |
0c850344 THJ |
1689 | qdisc_skb_cb(segs)->pkt_len = segs->len; |
1690 | cobalt_set_enqueue_time(segs, now); | |
1691 | get_cobalt_cb(segs)->adjusted_len = cake_overhead(q, | |
1692 | segs); | |
1693 | flow_queue_add(flow, segs); | |
1694 | ||
1695 | sch->q.qlen++; | |
8c6c37fd | 1696 | numsegs++; |
0c850344 THJ |
1697 | slen += segs->len; |
1698 | q->buffer_used += segs->truesize; | |
1699 | b->packets++; | |
1700 | segs = nskb; | |
1701 | } | |
8b713881 | 1702 | |
0c850344 THJ |
1703 | /* stats */ |
1704 | b->bytes += slen; | |
1705 | b->backlogs[idx] += slen; | |
1706 | b->tin_backlog += slen; | |
1707 | sch->qstats.backlog += slen; | |
1708 | q->avg_window_bytes += slen; | |
8b713881 | 1709 | |
8c6c37fd | 1710 | qdisc_tree_reduce_backlog(sch, 1-numsegs, len-slen); |
0c850344 | 1711 | consume_skb(skb); |
8b713881 | 1712 | } else { |
0c850344 THJ |
1713 | /* not splitting */ |
1714 | cobalt_set_enqueue_time(skb, now); | |
1715 | get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb); | |
1716 | flow_queue_add(flow, skb); | |
1717 | ||
1718 | if (q->ack_filter) | |
1719 | ack = cake_ack_filter(q, flow); | |
1720 | ||
1721 | if (ack) { | |
1722 | b->ack_drops++; | |
1723 | sch->qstats.drops++; | |
1724 | b->bytes += qdisc_pkt_len(ack); | |
1725 | len -= qdisc_pkt_len(ack); | |
1726 | q->buffer_used += skb->truesize - ack->truesize; | |
1727 | if (q->rate_flags & CAKE_FLAG_INGRESS) | |
1728 | cake_advance_shaper(q, b, ack, now, true); | |
1729 | ||
1730 | qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack)); | |
1731 | consume_skb(ack); | |
1732 | } else { | |
1733 | sch->q.qlen++; | |
1734 | q->buffer_used += skb->truesize; | |
1735 | } | |
046f6fd5 | 1736 | |
0c850344 THJ |
1737 | /* stats */ |
1738 | b->packets++; | |
1739 | b->bytes += len; | |
1740 | b->backlogs[idx] += len; | |
1741 | b->tin_backlog += len; | |
1742 | sch->qstats.backlog += len; | |
1743 | q->avg_window_bytes += len; | |
1744 | } | |
046f6fd5 THJ |
1745 | |
1746 | if (q->overflow_timeout) | |
1747 | cake_heapify_up(q, b->overflow_idx[idx]); | |
1748 | ||
1749 | /* incoming bandwidth capacity estimate */ | |
7298de9c THJ |
1750 | if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) { |
1751 | u64 packet_interval = \ | |
1752 | ktime_to_ns(ktime_sub(now, q->last_packet_time)); | |
1753 | ||
1754 | if (packet_interval > NSEC_PER_SEC) | |
1755 | packet_interval = NSEC_PER_SEC; | |
1756 | ||
1757 | /* filter out short-term bursts, eg. wifi aggregation */ | |
1758 | q->avg_packet_interval = \ | |
1759 | cake_ewma(q->avg_packet_interval, | |
1760 | packet_interval, | |
1761 | (packet_interval > q->avg_packet_interval ? | |
1762 | 2 : 8)); | |
1763 | ||
1764 | q->last_packet_time = now; | |
1765 | ||
1766 | if (packet_interval > q->avg_packet_interval) { | |
1767 | u64 window_interval = \ | |
1768 | ktime_to_ns(ktime_sub(now, | |
1769 | q->avg_window_begin)); | |
1770 | u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC; | |
1771 | ||
1772 | do_div(b, window_interval); | |
1773 | q->avg_peak_bandwidth = | |
1774 | cake_ewma(q->avg_peak_bandwidth, b, | |
1775 | b > q->avg_peak_bandwidth ? 2 : 8); | |
1776 | q->avg_window_bytes = 0; | |
1777 | q->avg_window_begin = now; | |
1778 | ||
1779 | if (ktime_after(now, | |
1780 | ktime_add_ms(q->last_reconfig_time, | |
1781 | 250))) { | |
1782 | q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4; | |
1783 | cake_reconfigure(sch); | |
1784 | } | |
1785 | } | |
1786 | } else { | |
1787 | q->avg_window_bytes = 0; | |
1788 | q->last_packet_time = now; | |
1789 | } | |
046f6fd5 THJ |
1790 | |
1791 | /* flowchain */ | |
1792 | if (!flow->set || flow->set == CAKE_SET_DECAYING) { | |
1793 | struct cake_host *srchost = &b->hosts[flow->srchost]; | |
1794 | struct cake_host *dsthost = &b->hosts[flow->dsthost]; | |
1795 | u16 host_load = 1; | |
1796 | ||
1797 | if (!flow->set) { | |
1798 | list_add_tail(&flow->flowchain, &b->new_flows); | |
1799 | } else { | |
1800 | b->decaying_flow_count--; | |
1801 | list_move_tail(&flow->flowchain, &b->new_flows); | |
1802 | } | |
1803 | flow->set = CAKE_SET_SPARSE; | |
1804 | b->sparse_flow_count++; | |
1805 | ||
1806 | if (cake_dsrc(q->flow_mode)) | |
71263992 | 1807 | host_load = max(host_load, srchost->srchost_bulk_flow_count); |
046f6fd5 THJ |
1808 | |
1809 | if (cake_ddst(q->flow_mode)) | |
71263992 | 1810 | host_load = max(host_load, dsthost->dsthost_bulk_flow_count); |
046f6fd5 THJ |
1811 | |
1812 | flow->deficit = (b->flow_quantum * | |
1813 | quantum_div[host_load]) >> 16; | |
1814 | } else if (flow->set == CAKE_SET_SPARSE_WAIT) { | |
71263992 GA |
1815 | struct cake_host *srchost = &b->hosts[flow->srchost]; |
1816 | struct cake_host *dsthost = &b->hosts[flow->dsthost]; | |
1817 | ||
046f6fd5 THJ |
1818 | /* this flow was empty, accounted as a sparse flow, but actually |
1819 | * in the bulk rotation. | |
1820 | */ | |
1821 | flow->set = CAKE_SET_BULK; | |
1822 | b->sparse_flow_count--; | |
1823 | b->bulk_flow_count++; | |
71263992 GA |
1824 | |
1825 | if (cake_dsrc(q->flow_mode)) | |
1826 | srchost->srchost_bulk_flow_count++; | |
1827 | ||
1828 | if (cake_ddst(q->flow_mode)) | |
1829 | dsthost->dsthost_bulk_flow_count++; | |
1830 | ||
046f6fd5 THJ |
1831 | } |
1832 | ||
1833 | if (q->buffer_used > q->buffer_max_used) | |
1834 | q->buffer_max_used = q->buffer_used; | |
1835 | ||
1836 | if (q->buffer_used > q->buffer_limit) { | |
1837 | u32 dropped = 0; | |
1838 | ||
1839 | while (q->buffer_used > q->buffer_limit) { | |
1840 | dropped++; | |
1841 | cake_drop(sch, to_free); | |
1842 | } | |
1843 | b->drop_overlimit += dropped; | |
1844 | } | |
1845 | return NET_XMIT_SUCCESS; | |
1846 | } | |
1847 | ||
1848 | static struct sk_buff *cake_dequeue_one(struct Qdisc *sch) | |
1849 | { | |
1850 | struct cake_sched_data *q = qdisc_priv(sch); | |
1851 | struct cake_tin_data *b = &q->tins[q->cur_tin]; | |
1852 | struct cake_flow *flow = &b->flows[q->cur_flow]; | |
1853 | struct sk_buff *skb = NULL; | |
1854 | u32 len; | |
1855 | ||
1856 | if (flow->head) { | |
1857 | skb = dequeue_head(flow); | |
1858 | len = qdisc_pkt_len(skb); | |
1859 | b->backlogs[q->cur_flow] -= len; | |
1860 | b->tin_backlog -= len; | |
1861 | sch->qstats.backlog -= len; | |
1862 | q->buffer_used -= skb->truesize; | |
1863 | sch->q.qlen--; | |
1864 | ||
1865 | if (q->overflow_timeout) | |
1866 | cake_heapify(q, b->overflow_idx[q->cur_flow]); | |
1867 | } | |
1868 | return skb; | |
1869 | } | |
1870 | ||
1871 | /* Discard leftover packets from a tin no longer in use. */ | |
1872 | static void cake_clear_tin(struct Qdisc *sch, u16 tin) | |
1873 | { | |
1874 | struct cake_sched_data *q = qdisc_priv(sch); | |
1875 | struct sk_buff *skb; | |
1876 | ||
1877 | q->cur_tin = tin; | |
1878 | for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++) | |
1879 | while (!!(skb = cake_dequeue_one(sch))) | |
1880 | kfree_skb(skb); | |
1881 | } | |
1882 | ||
1883 | static struct sk_buff *cake_dequeue(struct Qdisc *sch) | |
1884 | { | |
1885 | struct cake_sched_data *q = qdisc_priv(sch); | |
1886 | struct cake_tin_data *b = &q->tins[q->cur_tin]; | |
1887 | struct cake_host *srchost, *dsthost; | |
1888 | ktime_t now = ktime_get(); | |
1889 | struct cake_flow *flow; | |
1890 | struct list_head *head; | |
1891 | bool first_flow = true; | |
1892 | struct sk_buff *skb; | |
1893 | u16 host_load; | |
1894 | u64 delay; | |
1895 | u32 len; | |
1896 | ||
1897 | begin: | |
1898 | if (!sch->q.qlen) | |
1899 | return NULL; | |
1900 | ||
1901 | /* global hard shaper */ | |
1902 | if (ktime_after(q->time_next_packet, now) && | |
1903 | ktime_after(q->failsafe_next_packet, now)) { | |
1904 | u64 next = min(ktime_to_ns(q->time_next_packet), | |
1905 | ktime_to_ns(q->failsafe_next_packet)); | |
1906 | ||
1907 | sch->qstats.overlimits++; | |
1908 | qdisc_watchdog_schedule_ns(&q->watchdog, next); | |
1909 | return NULL; | |
1910 | } | |
1911 | ||
1912 | /* Choose a class to work on. */ | |
1913 | if (!q->rate_ns) { | |
1914 | /* In unlimited mode, can't rely on shaper timings, just balance | |
1915 | * with DRR | |
1916 | */ | |
1917 | bool wrapped = false, empty = true; | |
1918 | ||
1919 | while (b->tin_deficit < 0 || | |
1920 | !(b->sparse_flow_count + b->bulk_flow_count)) { | |
1921 | if (b->tin_deficit <= 0) | |
1922 | b->tin_deficit += b->tin_quantum_band; | |
1923 | if (b->sparse_flow_count + b->bulk_flow_count) | |
1924 | empty = false; | |
1925 | ||
1926 | q->cur_tin++; | |
1927 | b++; | |
1928 | if (q->cur_tin >= q->tin_cnt) { | |
1929 | q->cur_tin = 0; | |
1930 | b = q->tins; | |
1931 | ||
1932 | if (wrapped) { | |
1933 | /* It's possible for q->qlen to be | |
1934 | * nonzero when we actually have no | |
1935 | * packets anywhere. | |
1936 | */ | |
1937 | if (empty) | |
1938 | return NULL; | |
1939 | } else { | |
1940 | wrapped = true; | |
1941 | } | |
1942 | } | |
1943 | } | |
1944 | } else { | |
1945 | /* In shaped mode, choose: | |
1946 | * - Highest-priority tin with queue and meeting schedule, or | |
1947 | * - The earliest-scheduled tin with queue. | |
1948 | */ | |
1949 | ktime_t best_time = KTIME_MAX; | |
1950 | int tin, best_tin = 0; | |
1951 | ||
1952 | for (tin = 0; tin < q->tin_cnt; tin++) { | |
1953 | b = q->tins + tin; | |
1954 | if ((b->sparse_flow_count + b->bulk_flow_count) > 0) { | |
1955 | ktime_t time_to_pkt = \ | |
1956 | ktime_sub(b->time_next_packet, now); | |
1957 | ||
1958 | if (ktime_to_ns(time_to_pkt) <= 0 || | |
1959 | ktime_compare(time_to_pkt, | |
1960 | best_time) <= 0) { | |
1961 | best_time = time_to_pkt; | |
1962 | best_tin = tin; | |
1963 | } | |
1964 | } | |
1965 | } | |
1966 | ||
1967 | q->cur_tin = best_tin; | |
1968 | b = q->tins + best_tin; | |
1969 | ||
1970 | /* No point in going further if no packets to deliver. */ | |
1971 | if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count))) | |
1972 | return NULL; | |
1973 | } | |
1974 | ||
1975 | retry: | |
1976 | /* service this class */ | |
1977 | head = &b->decaying_flows; | |
1978 | if (!first_flow || list_empty(head)) { | |
1979 | head = &b->new_flows; | |
1980 | if (list_empty(head)) { | |
1981 | head = &b->old_flows; | |
1982 | if (unlikely(list_empty(head))) { | |
1983 | head = &b->decaying_flows; | |
1984 | if (unlikely(list_empty(head))) | |
1985 | goto begin; | |
1986 | } | |
1987 | } | |
1988 | } | |
1989 | flow = list_first_entry(head, struct cake_flow, flowchain); | |
1990 | q->cur_flow = flow - b->flows; | |
1991 | first_flow = false; | |
1992 | ||
1993 | /* triple isolation (modified DRR++) */ | |
1994 | srchost = &b->hosts[flow->srchost]; | |
1995 | dsthost = &b->hosts[flow->dsthost]; | |
1996 | host_load = 1; | |
1997 | ||
046f6fd5 THJ |
1998 | /* flow isolation (DRR++) */ |
1999 | if (flow->deficit <= 0) { | |
046f6fd5 THJ |
2000 | /* Keep all flows with deficits out of the sparse and decaying |
2001 | * rotations. No non-empty flow can go into the decaying | |
2002 | * rotation, so they can't get deficits | |
2003 | */ | |
2004 | if (flow->set == CAKE_SET_SPARSE) { | |
2005 | if (flow->head) { | |
2006 | b->sparse_flow_count--; | |
2007 | b->bulk_flow_count++; | |
71263992 GA |
2008 | |
2009 | if (cake_dsrc(q->flow_mode)) | |
2010 | srchost->srchost_bulk_flow_count++; | |
2011 | ||
2012 | if (cake_ddst(q->flow_mode)) | |
2013 | dsthost->dsthost_bulk_flow_count++; | |
2014 | ||
046f6fd5 THJ |
2015 | flow->set = CAKE_SET_BULK; |
2016 | } else { | |
2017 | /* we've moved it to the bulk rotation for | |
2018 | * correct deficit accounting but we still want | |
2019 | * to count it as a sparse flow, not a bulk one. | |
2020 | */ | |
2021 | flow->set = CAKE_SET_SPARSE_WAIT; | |
2022 | } | |
2023 | } | |
71263992 GA |
2024 | |
2025 | if (cake_dsrc(q->flow_mode)) | |
2026 | host_load = max(host_load, srchost->srchost_bulk_flow_count); | |
2027 | ||
2028 | if (cake_ddst(q->flow_mode)) | |
2029 | host_load = max(host_load, dsthost->dsthost_bulk_flow_count); | |
2030 | ||
2031 | WARN_ON(host_load > CAKE_QUEUES); | |
2032 | ||
2033 | /* The shifted prandom_u32() is a way to apply dithering to | |
2034 | * avoid accumulating roundoff errors | |
2035 | */ | |
2036 | flow->deficit += (b->flow_quantum * quantum_div[host_load] + | |
2037 | (prandom_u32() >> 16)) >> 16; | |
2038 | list_move_tail(&flow->flowchain, &b->old_flows); | |
2039 | ||
046f6fd5 THJ |
2040 | goto retry; |
2041 | } | |
2042 | ||
2043 | /* Retrieve a packet via the AQM */ | |
2044 | while (1) { | |
2045 | skb = cake_dequeue_one(sch); | |
2046 | if (!skb) { | |
2047 | /* this queue was actually empty */ | |
2048 | if (cobalt_queue_empty(&flow->cvars, &b->cparams, now)) | |
2049 | b->unresponsive_flow_count--; | |
2050 | ||
2051 | if (flow->cvars.p_drop || flow->cvars.count || | |
2052 | ktime_before(now, flow->cvars.drop_next)) { | |
2053 | /* keep in the flowchain until the state has | |
2054 | * decayed to rest | |
2055 | */ | |
2056 | list_move_tail(&flow->flowchain, | |
2057 | &b->decaying_flows); | |
2058 | if (flow->set == CAKE_SET_BULK) { | |
2059 | b->bulk_flow_count--; | |
71263992 GA |
2060 | |
2061 | if (cake_dsrc(q->flow_mode)) | |
2062 | srchost->srchost_bulk_flow_count--; | |
2063 | ||
2064 | if (cake_ddst(q->flow_mode)) | |
2065 | dsthost->dsthost_bulk_flow_count--; | |
2066 | ||
046f6fd5 THJ |
2067 | b->decaying_flow_count++; |
2068 | } else if (flow->set == CAKE_SET_SPARSE || | |
2069 | flow->set == CAKE_SET_SPARSE_WAIT) { | |
2070 | b->sparse_flow_count--; | |
2071 | b->decaying_flow_count++; | |
2072 | } | |
2073 | flow->set = CAKE_SET_DECAYING; | |
2074 | } else { | |
2075 | /* remove empty queue from the flowchain */ | |
2076 | list_del_init(&flow->flowchain); | |
2077 | if (flow->set == CAKE_SET_SPARSE || | |
2078 | flow->set == CAKE_SET_SPARSE_WAIT) | |
2079 | b->sparse_flow_count--; | |
71263992 | 2080 | else if (flow->set == CAKE_SET_BULK) { |
046f6fd5 | 2081 | b->bulk_flow_count--; |
71263992 GA |
2082 | |
2083 | if (cake_dsrc(q->flow_mode)) | |
2084 | srchost->srchost_bulk_flow_count--; | |
2085 | ||
2086 | if (cake_ddst(q->flow_mode)) | |
2087 | dsthost->dsthost_bulk_flow_count--; | |
2088 | ||
2089 | } else | |
046f6fd5 THJ |
2090 | b->decaying_flow_count--; |
2091 | ||
2092 | flow->set = CAKE_SET_NONE; | |
046f6fd5 THJ |
2093 | } |
2094 | goto begin; | |
2095 | } | |
2096 | ||
2097 | /* Last packet in queue may be marked, shouldn't be dropped */ | |
7298de9c THJ |
2098 | if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb, |
2099 | (b->bulk_flow_count * | |
2100 | !!(q->rate_flags & | |
2101 | CAKE_FLAG_INGRESS))) || | |
046f6fd5 THJ |
2102 | !flow->head) |
2103 | break; | |
2104 | ||
7298de9c THJ |
2105 | /* drop this packet, get another one */ |
2106 | if (q->rate_flags & CAKE_FLAG_INGRESS) { | |
2107 | len = cake_advance_shaper(q, b, skb, | |
2108 | now, true); | |
2109 | flow->deficit -= len; | |
2110 | b->tin_deficit -= len; | |
2111 | } | |
046f6fd5 THJ |
2112 | flow->dropped++; |
2113 | b->tin_dropped++; | |
2114 | qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb)); | |
2115 | qdisc_qstats_drop(sch); | |
2116 | kfree_skb(skb); | |
7298de9c THJ |
2117 | if (q->rate_flags & CAKE_FLAG_INGRESS) |
2118 | goto retry; | |
046f6fd5 THJ |
2119 | } |
2120 | ||
2121 | b->tin_ecn_mark += !!flow->cvars.ecn_marked; | |
2122 | qdisc_bstats_update(sch, skb); | |
2123 | ||
2124 | /* collect delay stats */ | |
2125 | delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb))); | |
2126 | b->avge_delay = cake_ewma(b->avge_delay, delay, 8); | |
2127 | b->peak_delay = cake_ewma(b->peak_delay, delay, | |
2128 | delay > b->peak_delay ? 2 : 8); | |
2129 | b->base_delay = cake_ewma(b->base_delay, delay, | |
2130 | delay < b->base_delay ? 2 : 8); | |
2131 | ||
2132 | len = cake_advance_shaper(q, b, skb, now, false); | |
2133 | flow->deficit -= len; | |
2134 | b->tin_deficit -= len; | |
2135 | ||
2136 | if (ktime_after(q->time_next_packet, now) && sch->q.qlen) { | |
2137 | u64 next = min(ktime_to_ns(q->time_next_packet), | |
2138 | ktime_to_ns(q->failsafe_next_packet)); | |
2139 | ||
2140 | qdisc_watchdog_schedule_ns(&q->watchdog, next); | |
2141 | } else if (!sch->q.qlen) { | |
2142 | int i; | |
2143 | ||
2144 | for (i = 0; i < q->tin_cnt; i++) { | |
2145 | if (q->tins[i].decaying_flow_count) { | |
2146 | ktime_t next = \ | |
2147 | ktime_add_ns(now, | |
2148 | q->tins[i].cparams.target); | |
2149 | ||
2150 | qdisc_watchdog_schedule_ns(&q->watchdog, | |
2151 | ktime_to_ns(next)); | |
2152 | break; | |
2153 | } | |
2154 | } | |
2155 | } | |
2156 | ||
2157 | if (q->overflow_timeout) | |
2158 | q->overflow_timeout--; | |
2159 | ||
2160 | return skb; | |
2161 | } | |
2162 | ||
2163 | static void cake_reset(struct Qdisc *sch) | |
2164 | { | |
2165 | u32 c; | |
2166 | ||
2167 | for (c = 0; c < CAKE_MAX_TINS; c++) | |
2168 | cake_clear_tin(sch, c); | |
2169 | } | |
2170 | ||
2171 | static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { | |
2172 | [TCA_CAKE_BASE_RATE64] = { .type = NLA_U64 }, | |
2173 | [TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 }, | |
2174 | [TCA_CAKE_ATM] = { .type = NLA_U32 }, | |
2175 | [TCA_CAKE_FLOW_MODE] = { .type = NLA_U32 }, | |
2176 | [TCA_CAKE_OVERHEAD] = { .type = NLA_S32 }, | |
2177 | [TCA_CAKE_RTT] = { .type = NLA_U32 }, | |
2178 | [TCA_CAKE_TARGET] = { .type = NLA_U32 }, | |
2179 | [TCA_CAKE_AUTORATE] = { .type = NLA_U32 }, | |
2180 | [TCA_CAKE_MEMORY] = { .type = NLA_U32 }, | |
2181 | [TCA_CAKE_NAT] = { .type = NLA_U32 }, | |
2182 | [TCA_CAKE_RAW] = { .type = NLA_U32 }, | |
2183 | [TCA_CAKE_WASH] = { .type = NLA_U32 }, | |
2184 | [TCA_CAKE_MPU] = { .type = NLA_U32 }, | |
2185 | [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, | |
2186 | [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, | |
eab2fc82 | 2187 | [TCA_CAKE_FWMARK] = { .type = NLA_U32 }, |
046f6fd5 THJ |
2188 | }; |
2189 | ||
2190 | static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, | |
2191 | u64 target_ns, u64 rtt_est_ns) | |
2192 | { | |
2193 | /* convert byte-rate into time-per-byte | |
2194 | * so it will always unwedge in reasonable time. | |
2195 | */ | |
2196 | static const u64 MIN_RATE = 64; | |
2197 | u32 byte_target = mtu; | |
2198 | u64 byte_target_ns; | |
2199 | u8 rate_shft = 0; | |
2200 | u64 rate_ns = 0; | |
2201 | ||
2202 | b->flow_quantum = 1514; | |
2203 | if (rate) { | |
2204 | b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL); | |
2205 | rate_shft = 34; | |
2206 | rate_ns = ((u64)NSEC_PER_SEC) << rate_shft; | |
2207 | rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate)); | |
2208 | while (!!(rate_ns >> 34)) { | |
2209 | rate_ns >>= 1; | |
2210 | rate_shft--; | |
2211 | } | |
2212 | } /* else unlimited, ie. zero delay */ | |
2213 | ||
2214 | b->tin_rate_bps = rate; | |
2215 | b->tin_rate_ns = rate_ns; | |
2216 | b->tin_rate_shft = rate_shft; | |
2217 | ||
2218 | byte_target_ns = (byte_target * rate_ns) >> rate_shft; | |
2219 | ||
2220 | b->cparams.target = max((byte_target_ns * 3) / 2, target_ns); | |
2221 | b->cparams.interval = max(rtt_est_ns + | |
2222 | b->cparams.target - target_ns, | |
2223 | b->cparams.target * 2); | |
2224 | b->cparams.mtu_time = byte_target_ns; | |
2225 | b->cparams.p_inc = 1 << 24; /* 1/256 */ | |
2226 | b->cparams.p_dec = 1 << 20; /* 1/4096 */ | |
2227 | } | |
2228 | ||
83f8fd69 | 2229 | static int cake_config_besteffort(struct Qdisc *sch) |
046f6fd5 THJ |
2230 | { |
2231 | struct cake_sched_data *q = qdisc_priv(sch); | |
2232 | struct cake_tin_data *b = &q->tins[0]; | |
83f8fd69 THJ |
2233 | u32 mtu = psched_mtu(qdisc_dev(sch)); |
2234 | u64 rate = q->rate_bps; | |
046f6fd5 THJ |
2235 | |
2236 | q->tin_cnt = 1; | |
83f8fd69 THJ |
2237 | |
2238 | q->tin_index = besteffort; | |
2239 | q->tin_order = normal_order; | |
2240 | ||
2241 | cake_set_rate(b, rate, mtu, | |
046f6fd5 THJ |
2242 | us_to_ns(q->target), us_to_ns(q->interval)); |
2243 | b->tin_quantum_band = 65535; | |
2244 | b->tin_quantum_prio = 65535; | |
2245 | ||
83f8fd69 THJ |
2246 | return 0; |
2247 | } | |
2248 | ||
2249 | static int cake_config_precedence(struct Qdisc *sch) | |
2250 | { | |
2251 | /* convert high-level (user visible) parameters into internal format */ | |
2252 | struct cake_sched_data *q = qdisc_priv(sch); | |
2253 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2254 | u64 rate = q->rate_bps; | |
2255 | u32 quantum1 = 256; | |
2256 | u32 quantum2 = 256; | |
2257 | u32 i; | |
2258 | ||
2259 | q->tin_cnt = 8; | |
2260 | q->tin_index = precedence; | |
2261 | q->tin_order = normal_order; | |
2262 | ||
2263 | for (i = 0; i < q->tin_cnt; i++) { | |
2264 | struct cake_tin_data *b = &q->tins[i]; | |
2265 | ||
2266 | cake_set_rate(b, rate, mtu, us_to_ns(q->target), | |
2267 | us_to_ns(q->interval)); | |
2268 | ||
2269 | b->tin_quantum_prio = max_t(u16, 1U, quantum1); | |
2270 | b->tin_quantum_band = max_t(u16, 1U, quantum2); | |
2271 | ||
2272 | /* calculate next class's parameters */ | |
2273 | rate *= 7; | |
2274 | rate >>= 3; | |
2275 | ||
2276 | quantum1 *= 3; | |
2277 | quantum1 >>= 1; | |
2278 | ||
2279 | quantum2 *= 7; | |
2280 | quantum2 >>= 3; | |
2281 | } | |
2282 | ||
2283 | return 0; | |
2284 | } | |
2285 | ||
2286 | /* List of known Diffserv codepoints: | |
2287 | * | |
2288 | * Least Effort (CS1) | |
2289 | * Best Effort (CS0) | |
2290 | * Max Reliability & LLT "Lo" (TOS1) | |
2291 | * Max Throughput (TOS2) | |
2292 | * Min Delay (TOS4) | |
2293 | * LLT "La" (TOS5) | |
2294 | * Assured Forwarding 1 (AF1x) - x3 | |
2295 | * Assured Forwarding 2 (AF2x) - x3 | |
2296 | * Assured Forwarding 3 (AF3x) - x3 | |
2297 | * Assured Forwarding 4 (AF4x) - x3 | |
2298 | * Precedence Class 2 (CS2) | |
2299 | * Precedence Class 3 (CS3) | |
2300 | * Precedence Class 4 (CS4) | |
2301 | * Precedence Class 5 (CS5) | |
2302 | * Precedence Class 6 (CS6) | |
2303 | * Precedence Class 7 (CS7) | |
2304 | * Voice Admit (VA) | |
2305 | * Expedited Forwarding (EF) | |
2306 | ||
2307 | * Total 25 codepoints. | |
2308 | */ | |
2309 | ||
2310 | /* List of traffic classes in RFC 4594: | |
2311 | * (roughly descending order of contended priority) | |
2312 | * (roughly ascending order of uncontended throughput) | |
2313 | * | |
2314 | * Network Control (CS6,CS7) - routing traffic | |
2315 | * Telephony (EF,VA) - aka. VoIP streams | |
2316 | * Signalling (CS5) - VoIP setup | |
2317 | * Multimedia Conferencing (AF4x) - aka. video calls | |
2318 | * Realtime Interactive (CS4) - eg. games | |
2319 | * Multimedia Streaming (AF3x) - eg. YouTube, NetFlix, Twitch | |
2320 | * Broadcast Video (CS3) | |
2321 | * Low Latency Data (AF2x,TOS4) - eg. database | |
2322 | * Ops, Admin, Management (CS2,TOS1) - eg. ssh | |
2323 | * Standard Service (CS0 & unrecognised codepoints) | |
2324 | * High Throughput Data (AF1x,TOS2) - eg. web traffic | |
2325 | * Low Priority Data (CS1) - eg. BitTorrent | |
2326 | ||
2327 | * Total 12 traffic classes. | |
2328 | */ | |
2329 | ||
2330 | static int cake_config_diffserv8(struct Qdisc *sch) | |
2331 | { | |
2332 | /* Pruned list of traffic classes for typical applications: | |
2333 | * | |
2334 | * Network Control (CS6, CS7) | |
2335 | * Minimum Latency (EF, VA, CS5, CS4) | |
2336 | * Interactive Shell (CS2, TOS1) | |
2337 | * Low Latency Transactions (AF2x, TOS4) | |
2338 | * Video Streaming (AF4x, AF3x, CS3) | |
2339 | * Bog Standard (CS0 etc.) | |
2340 | * High Throughput (AF1x, TOS2) | |
2341 | * Background Traffic (CS1) | |
2342 | * | |
2343 | * Total 8 traffic classes. | |
2344 | */ | |
2345 | ||
2346 | struct cake_sched_data *q = qdisc_priv(sch); | |
2347 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2348 | u64 rate = q->rate_bps; | |
2349 | u32 quantum1 = 256; | |
2350 | u32 quantum2 = 256; | |
2351 | u32 i; | |
2352 | ||
2353 | q->tin_cnt = 8; | |
2354 | ||
2355 | /* codepoint to class mapping */ | |
2356 | q->tin_index = diffserv8; | |
2357 | q->tin_order = normal_order; | |
2358 | ||
2359 | /* class characteristics */ | |
2360 | for (i = 0; i < q->tin_cnt; i++) { | |
2361 | struct cake_tin_data *b = &q->tins[i]; | |
2362 | ||
2363 | cake_set_rate(b, rate, mtu, us_to_ns(q->target), | |
2364 | us_to_ns(q->interval)); | |
2365 | ||
2366 | b->tin_quantum_prio = max_t(u16, 1U, quantum1); | |
2367 | b->tin_quantum_band = max_t(u16, 1U, quantum2); | |
2368 | ||
2369 | /* calculate next class's parameters */ | |
2370 | rate *= 7; | |
2371 | rate >>= 3; | |
2372 | ||
2373 | quantum1 *= 3; | |
2374 | quantum1 >>= 1; | |
2375 | ||
2376 | quantum2 *= 7; | |
2377 | quantum2 >>= 3; | |
2378 | } | |
2379 | ||
2380 | return 0; | |
2381 | } | |
2382 | ||
2383 | static int cake_config_diffserv4(struct Qdisc *sch) | |
2384 | { | |
2385 | /* Further pruned list of traffic classes for four-class system: | |
2386 | * | |
2387 | * Latency Sensitive (CS7, CS6, EF, VA, CS5, CS4) | |
2388 | * Streaming Media (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1) | |
2389 | * Best Effort (CS0, AF1x, TOS2, and those not specified) | |
2390 | * Background Traffic (CS1) | |
2391 | * | |
2392 | * Total 4 traffic classes. | |
2393 | */ | |
2394 | ||
2395 | struct cake_sched_data *q = qdisc_priv(sch); | |
2396 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2397 | u64 rate = q->rate_bps; | |
2398 | u32 quantum = 1024; | |
2399 | ||
2400 | q->tin_cnt = 4; | |
2401 | ||
2402 | /* codepoint to class mapping */ | |
2403 | q->tin_index = diffserv4; | |
2404 | q->tin_order = bulk_order; | |
2405 | ||
2406 | /* class characteristics */ | |
2407 | cake_set_rate(&q->tins[0], rate, mtu, | |
2408 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2409 | cake_set_rate(&q->tins[1], rate >> 4, mtu, | |
2410 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2411 | cake_set_rate(&q->tins[2], rate >> 1, mtu, | |
2412 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2413 | cake_set_rate(&q->tins[3], rate >> 2, mtu, | |
2414 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2415 | ||
2416 | /* priority weights */ | |
2417 | q->tins[0].tin_quantum_prio = quantum; | |
2418 | q->tins[1].tin_quantum_prio = quantum >> 4; | |
2419 | q->tins[2].tin_quantum_prio = quantum << 2; | |
2420 | q->tins[3].tin_quantum_prio = quantum << 4; | |
2421 | ||
2422 | /* bandwidth-sharing weights */ | |
2423 | q->tins[0].tin_quantum_band = quantum; | |
2424 | q->tins[1].tin_quantum_band = quantum >> 4; | |
2425 | q->tins[2].tin_quantum_band = quantum >> 1; | |
2426 | q->tins[3].tin_quantum_band = quantum >> 2; | |
2427 | ||
2428 | return 0; | |
2429 | } | |
2430 | ||
2431 | static int cake_config_diffserv3(struct Qdisc *sch) | |
2432 | { | |
2433 | /* Simplified Diffserv structure with 3 tins. | |
2434 | * Low Priority (CS1) | |
2435 | * Best Effort | |
2436 | * Latency Sensitive (TOS4, VA, EF, CS6, CS7) | |
2437 | */ | |
2438 | struct cake_sched_data *q = qdisc_priv(sch); | |
2439 | u32 mtu = psched_mtu(qdisc_dev(sch)); | |
2440 | u64 rate = q->rate_bps; | |
2441 | u32 quantum = 1024; | |
2442 | ||
2443 | q->tin_cnt = 3; | |
2444 | ||
2445 | /* codepoint to class mapping */ | |
2446 | q->tin_index = diffserv3; | |
2447 | q->tin_order = bulk_order; | |
2448 | ||
2449 | /* class characteristics */ | |
2450 | cake_set_rate(&q->tins[0], rate, mtu, | |
2451 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2452 | cake_set_rate(&q->tins[1], rate >> 4, mtu, | |
2453 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2454 | cake_set_rate(&q->tins[2], rate >> 2, mtu, | |
2455 | us_to_ns(q->target), us_to_ns(q->interval)); | |
2456 | ||
2457 | /* priority weights */ | |
2458 | q->tins[0].tin_quantum_prio = quantum; | |
2459 | q->tins[1].tin_quantum_prio = quantum >> 4; | |
2460 | q->tins[2].tin_quantum_prio = quantum << 4; | |
2461 | ||
2462 | /* bandwidth-sharing weights */ | |
2463 | q->tins[0].tin_quantum_band = quantum; | |
2464 | q->tins[1].tin_quantum_band = quantum >> 4; | |
2465 | q->tins[2].tin_quantum_band = quantum >> 2; | |
2466 | ||
2467 | return 0; | |
2468 | } | |
2469 | ||
2470 | static void cake_reconfigure(struct Qdisc *sch) | |
2471 | { | |
2472 | struct cake_sched_data *q = qdisc_priv(sch); | |
2473 | int c, ft; | |
2474 | ||
2475 | switch (q->tin_mode) { | |
2476 | case CAKE_DIFFSERV_BESTEFFORT: | |
2477 | ft = cake_config_besteffort(sch); | |
2478 | break; | |
2479 | ||
2480 | case CAKE_DIFFSERV_PRECEDENCE: | |
2481 | ft = cake_config_precedence(sch); | |
2482 | break; | |
2483 | ||
2484 | case CAKE_DIFFSERV_DIFFSERV8: | |
2485 | ft = cake_config_diffserv8(sch); | |
2486 | break; | |
2487 | ||
2488 | case CAKE_DIFFSERV_DIFFSERV4: | |
2489 | ft = cake_config_diffserv4(sch); | |
2490 | break; | |
2491 | ||
2492 | case CAKE_DIFFSERV_DIFFSERV3: | |
2493 | default: | |
2494 | ft = cake_config_diffserv3(sch); | |
2495 | break; | |
2496 | } | |
2497 | ||
046f6fd5 THJ |
2498 | for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) { |
2499 | cake_clear_tin(sch, c); | |
2500 | q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time; | |
2501 | } | |
2502 | ||
2503 | q->rate_ns = q->tins[ft].tin_rate_ns; | |
2504 | q->rate_shft = q->tins[ft].tin_rate_shft; | |
2505 | ||
2506 | if (q->buffer_config_limit) { | |
2507 | q->buffer_limit = q->buffer_config_limit; | |
2508 | } else if (q->rate_bps) { | |
2509 | u64 t = q->rate_bps * q->interval; | |
2510 | ||
2511 | do_div(t, USEC_PER_SEC / 4); | |
2512 | q->buffer_limit = max_t(u32, t, 4U << 20); | |
2513 | } else { | |
2514 | q->buffer_limit = ~0; | |
2515 | } | |
2516 | ||
2517 | sch->flags &= ~TCQ_F_CAN_BYPASS; | |
2518 | ||
2519 | q->buffer_limit = min(q->buffer_limit, | |
2520 | max(sch->limit * psched_mtu(qdisc_dev(sch)), | |
2521 | q->buffer_config_limit)); | |
2522 | } | |
2523 | ||
2524 | static int cake_change(struct Qdisc *sch, struct nlattr *opt, | |
2525 | struct netlink_ext_ack *extack) | |
2526 | { | |
2527 | struct cake_sched_data *q = qdisc_priv(sch); | |
2528 | struct nlattr *tb[TCA_CAKE_MAX + 1]; | |
2529 | int err; | |
2530 | ||
2531 | if (!opt) | |
2532 | return -EINVAL; | |
2533 | ||
8cb08174 JB |
2534 | err = nla_parse_nested_deprecated(tb, TCA_CAKE_MAX, opt, cake_policy, |
2535 | extack); | |
046f6fd5 THJ |
2536 | if (err < 0) |
2537 | return err; | |
2538 | ||
ea825115 THJ |
2539 | if (tb[TCA_CAKE_NAT]) { |
2540 | #if IS_ENABLED(CONFIG_NF_CONNTRACK) | |
2541 | q->flow_mode &= ~CAKE_FLOW_NAT_FLAG; | |
2542 | q->flow_mode |= CAKE_FLOW_NAT_FLAG * | |
2543 | !!nla_get_u32(tb[TCA_CAKE_NAT]); | |
2544 | #else | |
2545 | NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT], | |
2546 | "No conntrack support in kernel"); | |
2547 | return -EOPNOTSUPP; | |
2548 | #endif | |
2549 | } | |
2550 | ||
046f6fd5 THJ |
2551 | if (tb[TCA_CAKE_BASE_RATE64]) |
2552 | q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]); | |
2553 | ||
83f8fd69 THJ |
2554 | if (tb[TCA_CAKE_DIFFSERV_MODE]) |
2555 | q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]); | |
2556 | ||
2557 | if (tb[TCA_CAKE_WASH]) { | |
2558 | if (!!nla_get_u32(tb[TCA_CAKE_WASH])) | |
2559 | q->rate_flags |= CAKE_FLAG_WASH; | |
2560 | else | |
2561 | q->rate_flags &= ~CAKE_FLAG_WASH; | |
2562 | } | |
2563 | ||
046f6fd5 | 2564 | if (tb[TCA_CAKE_FLOW_MODE]) |
ea825115 THJ |
2565 | q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) | |
2566 | (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) & | |
2567 | CAKE_FLOW_MASK)); | |
046f6fd5 | 2568 | |
a729b7f0 THJ |
2569 | if (tb[TCA_CAKE_ATM]) |
2570 | q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]); | |
2571 | ||
2572 | if (tb[TCA_CAKE_OVERHEAD]) { | |
2573 | q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]); | |
2574 | q->rate_flags |= CAKE_FLAG_OVERHEAD; | |
2575 | ||
2576 | q->max_netlen = 0; | |
2577 | q->max_adjlen = 0; | |
2578 | q->min_netlen = ~0; | |
2579 | q->min_adjlen = ~0; | |
2580 | } | |
2581 | ||
2582 | if (tb[TCA_CAKE_RAW]) { | |
2583 | q->rate_flags &= ~CAKE_FLAG_OVERHEAD; | |
2584 | ||
2585 | q->max_netlen = 0; | |
2586 | q->max_adjlen = 0; | |
2587 | q->min_netlen = ~0; | |
2588 | q->min_adjlen = ~0; | |
2589 | } | |
2590 | ||
2591 | if (tb[TCA_CAKE_MPU]) | |
2592 | q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]); | |
2593 | ||
046f6fd5 THJ |
2594 | if (tb[TCA_CAKE_RTT]) { |
2595 | q->interval = nla_get_u32(tb[TCA_CAKE_RTT]); | |
2596 | ||
2597 | if (!q->interval) | |
2598 | q->interval = 1; | |
2599 | } | |
2600 | ||
2601 | if (tb[TCA_CAKE_TARGET]) { | |
2602 | q->target = nla_get_u32(tb[TCA_CAKE_TARGET]); | |
2603 | ||
2604 | if (!q->target) | |
2605 | q->target = 1; | |
2606 | } | |
2607 | ||
7298de9c THJ |
2608 | if (tb[TCA_CAKE_AUTORATE]) { |
2609 | if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE])) | |
2610 | q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS; | |
2611 | else | |
2612 | q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS; | |
2613 | } | |
2614 | ||
2615 | if (tb[TCA_CAKE_INGRESS]) { | |
2616 | if (!!nla_get_u32(tb[TCA_CAKE_INGRESS])) | |
2617 | q->rate_flags |= CAKE_FLAG_INGRESS; | |
2618 | else | |
2619 | q->rate_flags &= ~CAKE_FLAG_INGRESS; | |
2620 | } | |
2621 | ||
8b713881 THJ |
2622 | if (tb[TCA_CAKE_ACK_FILTER]) |
2623 | q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]); | |
2624 | ||
046f6fd5 THJ |
2625 | if (tb[TCA_CAKE_MEMORY]) |
2626 | q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]); | |
2627 | ||
2db6dc26 DT |
2628 | if (tb[TCA_CAKE_SPLIT_GSO]) { |
2629 | if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO])) | |
2630 | q->rate_flags |= CAKE_FLAG_SPLIT_GSO; | |
2631 | else | |
2632 | q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO; | |
2633 | } | |
0c850344 | 2634 | |
0b5c7efd | 2635 | if (tb[TCA_CAKE_FWMARK]) { |
eab2fc82 THJ |
2636 | q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); |
2637 | q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; | |
0b5c7efd KDB |
2638 | } |
2639 | ||
046f6fd5 THJ |
2640 | if (q->tins) { |
2641 | sch_tree_lock(sch); | |
2642 | cake_reconfigure(sch); | |
2643 | sch_tree_unlock(sch); | |
2644 | } | |
2645 | ||
2646 | return 0; | |
2647 | } | |
2648 | ||
2649 | static void cake_destroy(struct Qdisc *sch) | |
2650 | { | |
2651 | struct cake_sched_data *q = qdisc_priv(sch); | |
2652 | ||
2653 | qdisc_watchdog_cancel(&q->watchdog); | |
2654 | tcf_block_put(q->block); | |
2655 | kvfree(q->tins); | |
2656 | } | |
2657 | ||
2658 | static int cake_init(struct Qdisc *sch, struct nlattr *opt, | |
2659 | struct netlink_ext_ack *extack) | |
2660 | { | |
2661 | struct cake_sched_data *q = qdisc_priv(sch); | |
2662 | int i, j, err; | |
2663 | ||
2664 | sch->limit = 10240; | |
83f8fd69 | 2665 | q->tin_mode = CAKE_DIFFSERV_DIFFSERV3; |
046f6fd5 THJ |
2666 | q->flow_mode = CAKE_FLOW_TRIPLE; |
2667 | ||
2668 | q->rate_bps = 0; /* unlimited by default */ | |
2669 | ||
2670 | q->interval = 100000; /* 100ms default */ | |
2671 | q->target = 5000; /* 5ms: codel RFC argues | |
2672 | * for 5 to 10% of interval | |
2673 | */ | |
2db6dc26 | 2674 | q->rate_flags |= CAKE_FLAG_SPLIT_GSO; |
046f6fd5 THJ |
2675 | q->cur_tin = 0; |
2676 | q->cur_flow = 0; | |
2677 | ||
2678 | qdisc_watchdog_init(&q->watchdog, sch); | |
2679 | ||
2680 | if (opt) { | |
2681 | int err = cake_change(sch, opt, extack); | |
2682 | ||
2683 | if (err) | |
2684 | return err; | |
2685 | } | |
2686 | ||
2687 | err = tcf_block_get(&q->block, &q->filter_list, sch, extack); | |
2688 | if (err) | |
2689 | return err; | |
2690 | ||
2691 | quantum_div[0] = ~0; | |
2692 | for (i = 1; i <= CAKE_QUEUES; i++) | |
2693 | quantum_div[i] = 65535 / i; | |
2694 | ||
329e0989 | 2695 | q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), |
046f6fd5 THJ |
2696 | GFP_KERNEL); |
2697 | if (!q->tins) | |
2698 | goto nomem; | |
2699 | ||
2700 | for (i = 0; i < CAKE_MAX_TINS; i++) { | |
2701 | struct cake_tin_data *b = q->tins + i; | |
2702 | ||
2703 | INIT_LIST_HEAD(&b->new_flows); | |
2704 | INIT_LIST_HEAD(&b->old_flows); | |
2705 | INIT_LIST_HEAD(&b->decaying_flows); | |
2706 | b->sparse_flow_count = 0; | |
2707 | b->bulk_flow_count = 0; | |
2708 | b->decaying_flow_count = 0; | |
2709 | ||
2710 | for (j = 0; j < CAKE_QUEUES; j++) { | |
2711 | struct cake_flow *flow = b->flows + j; | |
2712 | u32 k = j * CAKE_MAX_TINS + i; | |
2713 | ||
2714 | INIT_LIST_HEAD(&flow->flowchain); | |
2715 | cobalt_vars_init(&flow->cvars); | |
2716 | ||
2717 | q->overflow_heap[k].t = i; | |
2718 | q->overflow_heap[k].b = j; | |
2719 | b->overflow_idx[j] = k; | |
2720 | } | |
2721 | } | |
2722 | ||
2723 | cake_reconfigure(sch); | |
2724 | q->avg_peak_bandwidth = q->rate_bps; | |
2725 | q->min_netlen = ~0; | |
2726 | q->min_adjlen = ~0; | |
2727 | return 0; | |
2728 | ||
2729 | nomem: | |
2730 | cake_destroy(sch); | |
2731 | return -ENOMEM; | |
2732 | } | |
2733 | ||
2734 | static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) | |
2735 | { | |
2736 | struct cake_sched_data *q = qdisc_priv(sch); | |
2737 | struct nlattr *opts; | |
2738 | ||
ae0be8de | 2739 | opts = nla_nest_start_noflag(skb, TCA_OPTIONS); |
046f6fd5 THJ |
2740 | if (!opts) |
2741 | goto nla_put_failure; | |
2742 | ||
2743 | if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps, | |
2744 | TCA_CAKE_PAD)) | |
2745 | goto nla_put_failure; | |
2746 | ||
2747 | if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE, | |
2748 | q->flow_mode & CAKE_FLOW_MASK)) | |
2749 | goto nla_put_failure; | |
2750 | ||
2751 | if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval)) | |
2752 | goto nla_put_failure; | |
2753 | ||
2754 | if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target)) | |
2755 | goto nla_put_failure; | |
2756 | ||
2757 | if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit)) | |
2758 | goto nla_put_failure; | |
2759 | ||
7298de9c THJ |
2760 | if (nla_put_u32(skb, TCA_CAKE_AUTORATE, |
2761 | !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS))) | |
2762 | goto nla_put_failure; | |
2763 | ||
2764 | if (nla_put_u32(skb, TCA_CAKE_INGRESS, | |
2765 | !!(q->rate_flags & CAKE_FLAG_INGRESS))) | |
2766 | goto nla_put_failure; | |
2767 | ||
8b713881 THJ |
2768 | if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter)) |
2769 | goto nla_put_failure; | |
2770 | ||
ea825115 THJ |
2771 | if (nla_put_u32(skb, TCA_CAKE_NAT, |
2772 | !!(q->flow_mode & CAKE_FLOW_NAT_FLAG))) | |
2773 | goto nla_put_failure; | |
2774 | ||
83f8fd69 THJ |
2775 | if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode)) |
2776 | goto nla_put_failure; | |
2777 | ||
2778 | if (nla_put_u32(skb, TCA_CAKE_WASH, | |
2779 | !!(q->rate_flags & CAKE_FLAG_WASH))) | |
2780 | goto nla_put_failure; | |
a729b7f0 THJ |
2781 | |
2782 | if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead)) | |
2783 | goto nla_put_failure; | |
2784 | ||
2785 | if (!(q->rate_flags & CAKE_FLAG_OVERHEAD)) | |
2786 | if (nla_put_u32(skb, TCA_CAKE_RAW, 0)) | |
2787 | goto nla_put_failure; | |
2788 | ||
2789 | if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode)) | |
2790 | goto nla_put_failure; | |
2791 | ||
2792 | if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu)) | |
2793 | goto nla_put_failure; | |
0c850344 THJ |
2794 | |
2795 | if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO, | |
2796 | !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) | |
2797 | goto nla_put_failure; | |
0b5c7efd | 2798 | |
eab2fc82 | 2799 | if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) |
0b5c7efd | 2800 | goto nla_put_failure; |
83f8fd69 | 2801 | |
046f6fd5 THJ |
2802 | return nla_nest_end(skb, opts); |
2803 | ||
2804 | nla_put_failure: | |
2805 | return -1; | |
2806 | } | |
2807 | ||
2808 | static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | |
2809 | { | |
ae0be8de | 2810 | struct nlattr *stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); |
046f6fd5 THJ |
2811 | struct cake_sched_data *q = qdisc_priv(sch); |
2812 | struct nlattr *tstats, *ts; | |
2813 | int i; | |
2814 | ||
2815 | if (!stats) | |
2816 | return -1; | |
2817 | ||
2818 | #define PUT_STAT_U32(attr, data) do { \ | |
2819 | if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ | |
2820 | goto nla_put_failure; \ | |
2821 | } while (0) | |
2822 | #define PUT_STAT_U64(attr, data) do { \ | |
2823 | if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \ | |
2824 | data, TCA_CAKE_STATS_PAD)) \ | |
2825 | goto nla_put_failure; \ | |
2826 | } while (0) | |
2827 | ||
2828 | PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth); | |
2829 | PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit); | |
2830 | PUT_STAT_U32(MEMORY_USED, q->buffer_max_used); | |
2831 | PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16)); | |
2832 | PUT_STAT_U32(MAX_NETLEN, q->max_netlen); | |
2833 | PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen); | |
2834 | PUT_STAT_U32(MIN_NETLEN, q->min_netlen); | |
2835 | PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen); | |
2836 | ||
2837 | #undef PUT_STAT_U32 | |
2838 | #undef PUT_STAT_U64 | |
2839 | ||
ae0be8de | 2840 | tstats = nla_nest_start_noflag(d->skb, TCA_CAKE_STATS_TIN_STATS); |
046f6fd5 THJ |
2841 | if (!tstats) |
2842 | goto nla_put_failure; | |
2843 | ||
2844 | #define PUT_TSTAT_U32(attr, data) do { \ | |
2845 | if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \ | |
2846 | goto nla_put_failure; \ | |
2847 | } while (0) | |
2848 | #define PUT_TSTAT_U64(attr, data) do { \ | |
2849 | if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \ | |
2850 | data, TCA_CAKE_TIN_STATS_PAD)) \ | |
2851 | goto nla_put_failure; \ | |
2852 | } while (0) | |
2853 | ||
2854 | for (i = 0; i < q->tin_cnt; i++) { | |
83f8fd69 | 2855 | struct cake_tin_data *b = &q->tins[q->tin_order[i]]; |
046f6fd5 | 2856 | |
ae0be8de | 2857 | ts = nla_nest_start_noflag(d->skb, i + 1); |
046f6fd5 THJ |
2858 | if (!ts) |
2859 | goto nla_put_failure; | |
2860 | ||
2861 | PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps); | |
2862 | PUT_TSTAT_U64(SENT_BYTES64, b->bytes); | |
2863 | PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog); | |
2864 | ||
2865 | PUT_TSTAT_U32(TARGET_US, | |
2866 | ktime_to_us(ns_to_ktime(b->cparams.target))); | |
2867 | PUT_TSTAT_U32(INTERVAL_US, | |
2868 | ktime_to_us(ns_to_ktime(b->cparams.interval))); | |
2869 | ||
2870 | PUT_TSTAT_U32(SENT_PACKETS, b->packets); | |
2871 | PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped); | |
2872 | PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark); | |
2873 | PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops); | |
2874 | ||
2875 | PUT_TSTAT_U32(PEAK_DELAY_US, | |
2876 | ktime_to_us(ns_to_ktime(b->peak_delay))); | |
2877 | PUT_TSTAT_U32(AVG_DELAY_US, | |
2878 | ktime_to_us(ns_to_ktime(b->avge_delay))); | |
2879 | PUT_TSTAT_U32(BASE_DELAY_US, | |
2880 | ktime_to_us(ns_to_ktime(b->base_delay))); | |
2881 | ||
2882 | PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits); | |
2883 | PUT_TSTAT_U32(WAY_MISSES, b->way_misses); | |
2884 | PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions); | |
2885 | ||
2886 | PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count + | |
2887 | b->decaying_flow_count); | |
2888 | PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count); | |
2889 | PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count); | |
2890 | PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen); | |
2891 | ||
2892 | PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum); | |
2893 | nla_nest_end(d->skb, ts); | |
2894 | } | |
2895 | ||
2896 | #undef PUT_TSTAT_U32 | |
2897 | #undef PUT_TSTAT_U64 | |
2898 | ||
2899 | nla_nest_end(d->skb, tstats); | |
2900 | return nla_nest_end(d->skb, stats); | |
2901 | ||
2902 | nla_put_failure: | |
2903 | nla_nest_cancel(d->skb, stats); | |
2904 | return -1; | |
2905 | } | |
2906 | ||
2907 | static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg) | |
2908 | { | |
2909 | return NULL; | |
2910 | } | |
2911 | ||
2912 | static unsigned long cake_find(struct Qdisc *sch, u32 classid) | |
2913 | { | |
2914 | return 0; | |
2915 | } | |
2916 | ||
2917 | static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent, | |
2918 | u32 classid) | |
2919 | { | |
2920 | return 0; | |
2921 | } | |
2922 | ||
2923 | static void cake_unbind(struct Qdisc *q, unsigned long cl) | |
2924 | { | |
2925 | } | |
2926 | ||
2927 | static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl, | |
2928 | struct netlink_ext_ack *extack) | |
2929 | { | |
2930 | struct cake_sched_data *q = qdisc_priv(sch); | |
2931 | ||
2932 | if (cl) | |
2933 | return NULL; | |
2934 | return q->block; | |
2935 | } | |
2936 | ||
2937 | static int cake_dump_class(struct Qdisc *sch, unsigned long cl, | |
2938 | struct sk_buff *skb, struct tcmsg *tcm) | |
2939 | { | |
2940 | tcm->tcm_handle |= TC_H_MIN(cl); | |
2941 | return 0; | |
2942 | } | |
2943 | ||
2944 | static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl, | |
2945 | struct gnet_dump *d) | |
2946 | { | |
2947 | struct cake_sched_data *q = qdisc_priv(sch); | |
2948 | const struct cake_flow *flow = NULL; | |
2949 | struct gnet_stats_queue qs = { 0 }; | |
2950 | struct nlattr *stats; | |
2951 | u32 idx = cl - 1; | |
2952 | ||
2953 | if (idx < CAKE_QUEUES * q->tin_cnt) { | |
83f8fd69 THJ |
2954 | const struct cake_tin_data *b = \ |
2955 | &q->tins[q->tin_order[idx / CAKE_QUEUES]]; | |
046f6fd5 THJ |
2956 | const struct sk_buff *skb; |
2957 | ||
2958 | flow = &b->flows[idx % CAKE_QUEUES]; | |
2959 | ||
2960 | if (flow->head) { | |
2961 | sch_tree_lock(sch); | |
2962 | skb = flow->head; | |
2963 | while (skb) { | |
2964 | qs.qlen++; | |
2965 | skb = skb->next; | |
2966 | } | |
2967 | sch_tree_unlock(sch); | |
2968 | } | |
2969 | qs.backlog = b->backlogs[idx % CAKE_QUEUES]; | |
2970 | qs.drops = flow->dropped; | |
2971 | } | |
2972 | if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) | |
2973 | return -1; | |
2974 | if (flow) { | |
2975 | ktime_t now = ktime_get(); | |
2976 | ||
ae0be8de | 2977 | stats = nla_nest_start_noflag(d->skb, TCA_STATS_APP); |
046f6fd5 THJ |
2978 | if (!stats) |
2979 | return -1; | |
2980 | ||
2981 | #define PUT_STAT_U32(attr, data) do { \ | |
2982 | if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ | |
2983 | goto nla_put_failure; \ | |
2984 | } while (0) | |
2985 | #define PUT_STAT_S32(attr, data) do { \ | |
2986 | if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \ | |
2987 | goto nla_put_failure; \ | |
2988 | } while (0) | |
2989 | ||
2990 | PUT_STAT_S32(DEFICIT, flow->deficit); | |
2991 | PUT_STAT_U32(DROPPING, flow->cvars.dropping); | |
2992 | PUT_STAT_U32(COBALT_COUNT, flow->cvars.count); | |
2993 | PUT_STAT_U32(P_DROP, flow->cvars.p_drop); | |
2994 | if (flow->cvars.p_drop) { | |
2995 | PUT_STAT_S32(BLUE_TIMER_US, | |
2996 | ktime_to_us( | |
2997 | ktime_sub(now, | |
2998 | flow->cvars.blue_timer))); | |
2999 | } | |
3000 | if (flow->cvars.dropping) { | |
3001 | PUT_STAT_S32(DROP_NEXT_US, | |
3002 | ktime_to_us( | |
3003 | ktime_sub(now, | |
3004 | flow->cvars.drop_next))); | |
3005 | } | |
3006 | ||
3007 | if (nla_nest_end(d->skb, stats) < 0) | |
3008 | return -1; | |
3009 | } | |
3010 | ||
3011 | return 0; | |
3012 | ||
3013 | nla_put_failure: | |
3014 | nla_nest_cancel(d->skb, stats); | |
3015 | return -1; | |
3016 | } | |
3017 | ||
3018 | static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |
3019 | { | |
3020 | struct cake_sched_data *q = qdisc_priv(sch); | |
3021 | unsigned int i, j; | |
3022 | ||
3023 | if (arg->stop) | |
3024 | return; | |
3025 | ||
3026 | for (i = 0; i < q->tin_cnt; i++) { | |
83f8fd69 | 3027 | struct cake_tin_data *b = &q->tins[q->tin_order[i]]; |
046f6fd5 THJ |
3028 | |
3029 | for (j = 0; j < CAKE_QUEUES; j++) { | |
3030 | if (list_empty(&b->flows[j].flowchain) || | |
3031 | arg->count < arg->skip) { | |
3032 | arg->count++; | |
3033 | continue; | |
3034 | } | |
3035 | if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) { | |
3036 | arg->stop = 1; | |
3037 | break; | |
3038 | } | |
3039 | arg->count++; | |
3040 | } | |
3041 | } | |
3042 | } | |
3043 | ||
3044 | static const struct Qdisc_class_ops cake_class_ops = { | |
3045 | .leaf = cake_leaf, | |
3046 | .find = cake_find, | |
3047 | .tcf_block = cake_tcf_block, | |
3048 | .bind_tcf = cake_bind, | |
3049 | .unbind_tcf = cake_unbind, | |
3050 | .dump = cake_dump_class, | |
3051 | .dump_stats = cake_dump_class_stats, | |
3052 | .walk = cake_walk, | |
3053 | }; | |
3054 | ||
3055 | static struct Qdisc_ops cake_qdisc_ops __read_mostly = { | |
3056 | .cl_ops = &cake_class_ops, | |
3057 | .id = "cake", | |
3058 | .priv_size = sizeof(struct cake_sched_data), | |
3059 | .enqueue = cake_enqueue, | |
3060 | .dequeue = cake_dequeue, | |
3061 | .peek = qdisc_peek_dequeued, | |
3062 | .init = cake_init, | |
3063 | .reset = cake_reset, | |
3064 | .destroy = cake_destroy, | |
3065 | .change = cake_change, | |
3066 | .dump = cake_dump, | |
3067 | .dump_stats = cake_dump_stats, | |
3068 | .owner = THIS_MODULE, | |
3069 | }; | |
3070 | ||
3071 | static int __init cake_module_init(void) | |
3072 | { | |
3073 | return register_qdisc(&cake_qdisc_ops); | |
3074 | } | |
3075 | ||
3076 | static void __exit cake_module_exit(void) | |
3077 | { | |
3078 | unregister_qdisc(&cake_qdisc_ops); | |
3079 | } | |
3080 | ||
3081 | module_init(cake_module_init) | |
3082 | module_exit(cake_module_exit) | |
3083 | MODULE_AUTHOR("Jonathan Morton"); | |
3084 | MODULE_LICENSE("Dual BSD/GPL"); | |
3085 | MODULE_DESCRIPTION("The CAKE shaper."); |