Merge tag 'sched-core-2024-09-19' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-block.git] / net / sched / sch_netem.c
CommitLineData
84a14ae8 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * net/sched/sch_netem.c Network emulator
4 *
1da177e4 5 * Many of the algorithms and ideas for this came from
10297b99 6 * NIST Net which is not copyrighted.
1da177e4
LT
7 *
8 * Authors: Stephen Hemminger <shemminger@osdl.org>
9 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
10 */
11
b7f080cf 12#include <linux/mm.h>
1da177e4 13#include <linux/module.h>
5a0e3ad6 14#include <linux/slab.h>
1da177e4
LT
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
78776d3f 19#include <linux/vmalloc.h>
1da177e4 20#include <linux/rtnetlink.h>
90b41a1c 21#include <linux/reciprocal_div.h>
aec0a40a 22#include <linux/rbtree.h>
1da177e4 23
d457a0e3 24#include <net/gso.h>
dc5fc579 25#include <net/netlink.h>
1da177e4 26#include <net/pkt_sched.h>
e4ae004b 27#include <net/inet_ecn.h>
1da177e4 28
250a65f7 29#define VERSION "1.3"
eb229c4c 30
1da177e4
LT
31/* Network Emulation Queuing algorithm.
32 ====================================
33
34 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
35 Network Emulation Tool
36 [2] Luigi Rizzo, DummyNet for FreeBSD
37
38 ----------------------------------------------------------------
39
40 This started out as a simple way to delay outgoing packets to
41 test TCP but has grown to include most of the functionality
42 of a full blown network emulator like NISTnet. It can delay
43 packets and add random jitter (and correlation). The random
44 distribution can be loaded from a table as well to provide
45 normal, Pareto, or experimental curves. Packet loss,
46 duplication, and reordering can also be emulated.
47
48 This qdisc does not do classification that can be handled in
49 layering other disciplines. It does not need to do bandwidth
50 control either since that can be handled by using token
51 bucket or other rate control.
661b7972 52
53 Correlated Loss Generator models
54
55 Added generation of correlated loss according to the
56 "Gilbert-Elliot" model, a 4-state markov model.
57
58 References:
59 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61 and intuitive loss model for packet networks and its implementation
62 in the Netem module in the Linux kernel", available in [1]
63
64 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65 Fabio Ludovici <fabio.ludovici at yahoo.it>
1da177e4
LT
66*/
67
0a9fe5c3
YS
68struct disttable {
69 u32 size;
0fef0907 70 s16 table[] __counted_by(size);
0a9fe5c3
YS
71};
72
1da177e4 73struct netem_sched_data {
aec0a40a
ED
74 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
75 struct rb_root t_root;
50612537 76
d66280b1
PO
77 /* a linear queue; reduces rbtree rebalancing when jitter is low */
78 struct sk_buff *t_head;
79 struct sk_buff *t_tail;
80
50612537 81 /* optional qdisc for classful handling (NULL at netem init) */
1da177e4 82 struct Qdisc *qdisc;
50612537 83
59cb5c67 84 struct qdisc_watchdog watchdog;
1da177e4 85
112f9cb6
DT
86 s64 latency;
87 s64 jitter;
b407621c 88
1da177e4 89 u32 loss;
e4ae004b 90 u32 ecn;
1da177e4
LT
91 u32 limit;
92 u32 counter;
93 u32 gap;
1da177e4 94 u32 duplicate;
0dca51d3 95 u32 reorder;
c865e5d9 96 u32 corrupt;
6a031f67 97 u64 rate;
90b41a1c
HPP
98 s32 packet_overhead;
99 u32 cell_size;
809fa972 100 struct reciprocal_value cell_size_reciprocal;
90b41a1c 101 s32 cell_overhead;
1da177e4
LT
102
103 struct crndstate {
b407621c
SH
104 u32 last;
105 u32 rho;
c865e5d9 106 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1da177e4 107
4072d97d
FM
108 struct prng {
109 u64 seed;
110 struct rnd_state prng_state;
111 } prng;
112
0a9fe5c3 113 struct disttable *delay_dist;
661b7972 114
115 enum {
116 CLG_RANDOM,
117 CLG_4_STATES,
118 CLG_GILB_ELL,
119 } loss_model;
120
a6e2fe17
YY
121 enum {
122 TX_IN_GAP_PERIOD = 1,
123 TX_IN_BURST_PERIOD,
124 LOST_IN_GAP_PERIOD,
125 LOST_IN_BURST_PERIOD,
126 } _4_state_model;
127
c045a734
YY
128 enum {
129 GOOD_STATE = 1,
130 BAD_STATE,
131 } GE_state_model;
132
661b7972 133 /* Correlated Loss Generation models */
134 struct clgstate {
135 /* state of the Markov chain */
136 u8 state;
137
138 /* 4-states and Gilbert-Elliot models */
139 u32 a1; /* p13 for 4-states or p for GE */
140 u32 a2; /* p31 for 4-states or r for GE */
141 u32 a3; /* p32 for 4-states or h for GE */
142 u32 a4; /* p14 for 4-states or 1-k for GE */
143 u32 a5; /* p23 used only in 4-states */
144 } clg;
145
836af83b
DT
146 struct tc_netem_slot slot_config;
147 struct slotstate {
148 u64 slot_next;
149 s32 packets_left;
150 s32 bytes_left;
151 } slot;
152
0a9fe5c3 153 struct disttable *slot_dist;
1da177e4
LT
154};
155
50612537
ED
156/* Time stamp put into socket buffer control block
157 * Only valid when skbs are in our internal t(ime)fifo queue.
56b17425
ED
158 *
159 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
160 * and skb->next & skb->prev are scratch space for a qdisc,
161 * we save skb->tstamp value in skb->cb[] before destroying it.
50612537 162 */
1da177e4 163struct netem_skb_cb {
112f9cb6 164 u64 time_to_send;
1da177e4
LT
165};
166
5f86173b
JK
167static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
168{
aec0a40a 169 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
16bda13d 170 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
175f9c1b 171 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
5f86173b
JK
172}
173
1da177e4
LT
174/* init_crandom - initialize correlated random number generator
175 * Use entropy source for initial seed.
176 */
177static void init_crandom(struct crndstate *state, unsigned long rho)
178{
179 state->rho = rho;
a251c17a 180 state->last = get_random_u32();
1da177e4
LT
181}
182
183/* get_crandom - correlated random number generator
184 * Next number depends on last value.
185 * rho is scaled to avoid floating point.
186 */
3cad70bc 187static u32 get_crandom(struct crndstate *state, struct prng *p)
1da177e4
LT
188{
189 u64 value, rho;
190 unsigned long answer;
3cad70bc 191 struct rnd_state *s = &p->prng_state;
1da177e4 192
0a9fe5c3 193 if (!state || state->rho == 0) /* no correlation */
3cad70bc 194 return prandom_u32_state(s);
1da177e4 195
3cad70bc 196 value = prandom_u32_state(s);
1da177e4
LT
197 rho = (u64)state->rho + 1;
198 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
199 state->last = answer;
200 return answer;
201}
202
661b7972 203/* loss_4state - 4-state model loss generator
204 * Generates losses according to the 4-state Markov chain adopted in
205 * the GI (General and Intuitive) loss model.
206 */
207static bool loss_4state(struct netem_sched_data *q)
208{
209 struct clgstate *clg = &q->clg;
9c87b2ae 210 u32 rnd = prandom_u32_state(&q->prng.prng_state);
661b7972 211
212 /*
25985edc 213 * Makes a comparison between rnd and the transition
661b7972 214 * probabilities outgoing from the current state, then decides the
215 * next state and if the next packet has to be transmitted or lost.
216 * The four states correspond to:
a6e2fe17 217 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
cb3ef7b0
HM
218 * LOST_IN_GAP_PERIOD => isolated losses within a gap period
219 * LOST_IN_BURST_PERIOD => lost packets within a burst period
220 * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
661b7972 221 */
222 switch (clg->state) {
a6e2fe17 223 case TX_IN_GAP_PERIOD:
661b7972 224 if (rnd < clg->a4) {
cb3ef7b0 225 clg->state = LOST_IN_GAP_PERIOD;
661b7972 226 return true;
ab6c27be 227 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
cb3ef7b0 228 clg->state = LOST_IN_BURST_PERIOD;
661b7972 229 return true;
a6e2fe17
YY
230 } else if (clg->a1 + clg->a4 < rnd) {
231 clg->state = TX_IN_GAP_PERIOD;
232 }
661b7972 233
234 break;
a6e2fe17 235 case TX_IN_BURST_PERIOD:
661b7972 236 if (rnd < clg->a5) {
cb3ef7b0 237 clg->state = LOST_IN_BURST_PERIOD;
661b7972 238 return true;
a6e2fe17
YY
239 } else {
240 clg->state = TX_IN_BURST_PERIOD;
241 }
661b7972 242
243 break;
cb3ef7b0 244 case LOST_IN_BURST_PERIOD:
661b7972 245 if (rnd < clg->a3)
a6e2fe17 246 clg->state = TX_IN_BURST_PERIOD;
661b7972 247 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
a6e2fe17 248 clg->state = TX_IN_GAP_PERIOD;
661b7972 249 } else if (clg->a2 + clg->a3 < rnd) {
cb3ef7b0 250 clg->state = LOST_IN_BURST_PERIOD;
661b7972 251 return true;
252 }
253 break;
cb3ef7b0 254 case LOST_IN_GAP_PERIOD:
a6e2fe17 255 clg->state = TX_IN_GAP_PERIOD;
661b7972 256 break;
257 }
258
259 return false;
260}
261
262/* loss_gilb_ell - Gilbert-Elliot model loss generator
263 * Generates losses according to the Gilbert-Elliot loss model or
264 * its special cases (Gilbert or Simple Gilbert)
265 *
25985edc 266 * Makes a comparison between random number and the transition
661b7972 267 * probabilities outgoing from the current state, then decides the
25985edc 268 * next state. A second random number is extracted and the comparison
661b7972 269 * with the loss probability of the current state decides if the next
270 * packet will be transmitted or lost.
271 */
272static bool loss_gilb_ell(struct netem_sched_data *q)
273{
274 struct clgstate *clg = &q->clg;
9c87b2ae 275 struct rnd_state *s = &q->prng.prng_state;
661b7972 276
277 switch (clg->state) {
c045a734 278 case GOOD_STATE:
9c87b2ae 279 if (prandom_u32_state(s) < clg->a1)
c045a734 280 clg->state = BAD_STATE;
9c87b2ae 281 if (prandom_u32_state(s) < clg->a4)
661b7972 282 return true;
7c2781fa 283 break;
c045a734 284 case BAD_STATE:
9c87b2ae 285 if (prandom_u32_state(s) < clg->a2)
c045a734 286 clg->state = GOOD_STATE;
9c87b2ae 287 if (prandom_u32_state(s) > clg->a3)
661b7972 288 return true;
289 }
290
291 return false;
292}
293
294static bool loss_event(struct netem_sched_data *q)
295{
296 switch (q->loss_model) {
297 case CLG_RANDOM:
298 /* Random packet drop 0 => none, ~0 => all */
3cad70bc 299 return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
661b7972 300
301 case CLG_4_STATES:
302 /* 4state loss model algorithm (used also for GI model)
303 * Extracts a value from the markov 4 state loss generator,
304 * if it is 1 drops a packet and if needed writes the event in
305 * the kernel logs
306 */
307 return loss_4state(q);
308
309 case CLG_GILB_ELL:
310 /* Gilbert-Elliot loss model algorithm
311 * Extracts a value from the Gilbert-Elliot loss generator,
312 * if it is 1 drops a packet and if needed writes the event in
313 * the kernel logs
314 */
315 return loss_gilb_ell(q);
316 }
317
318 return false; /* not reached */
319}
320
321
1da177e4
LT
322/* tabledist - return a pseudo-randomly distributed value with mean mu and
323 * std deviation sigma. Uses table lookup to approximate the desired
324 * distribution, and a uniformly-distributed pseudo-random source.
325 */
9b0ed891 326static s64 tabledist(s64 mu, s32 sigma,
112f9cb6 327 struct crndstate *state,
3cad70bc 328 struct prng *prng,
9b0ed891 329 const struct disttable *dist)
1da177e4 330{
112f9cb6 331 s64 x;
b407621c
SH
332 long t;
333 u32 rnd;
1da177e4
LT
334
335 if (sigma == 0)
336 return mu;
337
3cad70bc 338 rnd = get_crandom(state, prng);
1da177e4
LT
339
340 /* default uniform distribution */
10297b99 341 if (dist == NULL)
eadd1bef 342 return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
1da177e4
LT
343
344 t = dist->table[rnd % dist->size];
345 x = (sigma % NETEM_DIST_SCALE) * t;
346 if (x >= 0)
347 x += NETEM_DIST_SCALE/2;
348 else
349 x -= NETEM_DIST_SCALE/2;
350
351 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
352}
353
bce552fd 354static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
7bc0f28c 355{
90b41a1c
HPP
356 len += q->packet_overhead;
357
358 if (q->cell_size) {
359 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
360
361 if (len > cells * q->cell_size) /* extra cell needed for remainder */
362 cells++;
363 len = cells * (q->cell_size + q->cell_overhead);
364 }
bce552fd
SH
365
366 return div64_u64(len * NSEC_PER_SEC, q->rate);
7bc0f28c
HPP
367}
368
ff704050 369static void tfifo_reset(struct Qdisc *sch)
370{
371 struct netem_sched_data *q = qdisc_priv(sch);
3aa605f2 372 struct rb_node *p = rb_first(&q->t_root);
ff704050 373
3aa605f2 374 while (p) {
18a4c0ea 375 struct sk_buff *skb = rb_to_skb(p);
ff704050 376
3aa605f2
ED
377 p = rb_next(p);
378 rb_erase(&skb->rbnode, &q->t_root);
2f08a9a1 379 rtnl_kfree_skbs(skb, skb);
ff704050 380 }
d66280b1
PO
381
382 rtnl_kfree_skbs(q->t_head, q->t_tail);
383 q->t_head = NULL;
384 q->t_tail = NULL;
ff704050 385}
386
960fb66e 387static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
50612537 388{
aec0a40a 389 struct netem_sched_data *q = qdisc_priv(sch);
112f9cb6 390 u64 tnext = netem_skb_cb(nskb)->time_to_send;
50612537 391
d66280b1
PO
392 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
393 if (q->t_tail)
394 q->t_tail->next = nskb;
aec0a40a 395 else
d66280b1
PO
396 q->t_head = nskb;
397 q->t_tail = nskb;
398 } else {
399 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
400
401 while (*p) {
402 struct sk_buff *skb;
403
404 parent = *p;
405 skb = rb_to_skb(parent);
406 if (tnext >= netem_skb_cb(skb)->time_to_send)
407 p = &parent->rb_right;
408 else
409 p = &parent->rb_left;
410 }
411 rb_link_node(&nskb->rbnode, parent, p);
412 rb_insert_color(&nskb->rbnode, &q->t_root);
50612537 413 }
aec0a40a 414 sch->q.qlen++;
50612537
ED
415}
416
6071bd1a
NH
417/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
418 * when we statistically choose to corrupt one, we instead segment it, returning
419 * the first packet to be corrupted, and re-enqueue the remaining frames
420 */
520ac30f
ED
421static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
422 struct sk_buff **to_free)
6071bd1a
NH
423{
424 struct sk_buff *segs;
425 netdev_features_t features = netif_skb_features(skb);
426
427 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
428
429 if (IS_ERR_OR_NULL(segs)) {
520ac30f 430 qdisc_drop(skb, sch, to_free);
6071bd1a
NH
431 return NULL;
432 }
433 consume_skb(skb);
434 return segs;
435}
436
0afb51e7
SH
437/*
438 * Insert one skb into qdisc.
439 * Note: parent depends on return value to account for queue length.
440 * NET_XMIT_DROP: queue length didn't change.
441 * NET_XMIT_SUCCESS: one skb was queued.
442 */
520ac30f
ED
443static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
444 struct sk_buff **to_free)
1da177e4
LT
445{
446 struct netem_sched_data *q = qdisc_priv(sch);
89e1df74
GC
447 /* We don't fill cb now as skb_unshare() may invalidate it */
448 struct netem_skb_cb *cb;
c07ff859 449 struct sk_buff *skb2 = NULL;
6071bd1a 450 struct sk_buff *segs = NULL;
177b8007 451 unsigned int prev_len = qdisc_pkt_len(skb);
0afb51e7 452 int count = 1;
1da177e4 453
9410d386
CP
454 /* Do not fool qdisc_drop_all() */
455 skb->prev = NULL;
456
0afb51e7 457 /* Random duplication */
3cad70bc 458 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
0afb51e7
SH
459 ++count;
460
661b7972 461 /* Drop packet? */
e4ae004b
ED
462 if (loss_event(q)) {
463 if (q->ecn && INET_ECN_set_ce(skb))
25331d6c 464 qdisc_qstats_drop(sch); /* mark packet */
e4ae004b
ED
465 else
466 --count;
467 }
0afb51e7 468 if (count == 0) {
25331d6c 469 qdisc_qstats_drop(sch);
520ac30f 470 __qdisc_drop(skb, to_free);
c27f339a 471 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
472 }
473
5a308f40
ED
474 /* If a delay is expected, orphan the skb. (orphaning usually takes
475 * place at TX completion time, so _before_ the link transit delay)
5a308f40 476 */
5080f39e 477 if (q->latency || q->jitter || q->rate)
f2f872f9 478 skb_orphan_partial(skb);
4e8a5201 479
0afb51e7 480 /*
c07ff859
SH
481 * If we need to duplicate packet, then clone it before
482 * original is modified.
0afb51e7 483 */
c07ff859
SH
484 if (count > 1)
485 skb2 = skb_clone(skb, GFP_ATOMIC);
1da177e4 486
c865e5d9
SH
487 /*
488 * Randomized packet corruption.
489 * Make copy if needed since we are modifying
490 * If packet is going to be hardware checksummed, then
491 * do it now in software before we mangle it.
492 */
3cad70bc 493 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
6071bd1a 494 if (skb_is_gso(skb)) {
3e14c383
JK
495 skb = netem_segment(skb, sch, to_free);
496 if (!skb)
c07ff859
SH
497 goto finish_segs;
498
3e14c383
JK
499 segs = skb->next;
500 skb_mark_not_on_list(skb);
501 qdisc_skb_cb(skb)->pkt_len = skb->len;
6071bd1a
NH
502 }
503
8a6e9c67
ED
504 skb = skb_unshare(skb, GFP_ATOMIC);
505 if (unlikely(!skb)) {
506 qdisc_qstats_drop(sch);
507 goto finish_segs;
508 }
509 if (skb->ip_summed == CHECKSUM_PARTIAL &&
510 skb_checksum_help(skb)) {
511 qdisc_drop(skb, sch, to_free);
a7fa12d1 512 skb = NULL;
6071bd1a
NH
513 goto finish_segs;
514 }
c865e5d9 515
8032bf12
JD
516 skb->data[get_random_u32_below(skb_headlen(skb))] ^=
517 1<<get_random_u32_below(8);
c865e5d9
SH
518 }
519
5845f706 520 if (unlikely(sch->q.qlen >= sch->limit)) {
3e14c383
JK
521 /* re-link segs, so that qdisc_drop_all() frees them all */
522 skb->next = segs;
5845f706 523 qdisc_drop_all(skb, sch, to_free);
c07ff859
SH
524 if (skb2)
525 __qdisc_drop(skb2, to_free);
526 return NET_XMIT_DROP;
527 }
528
529 /*
530 * If doing duplication then re-insert at top of the
531 * qdisc tree, since parent queuer expects that only one
532 * skb will be queued.
533 */
534 if (skb2) {
535 struct Qdisc *rootq = qdisc_root_bh(sch);
536 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
537
538 q->duplicate = 0;
539 rootq->enqueue(skb2, rootq, to_free);
540 q->duplicate = dupsave;
541 skb2 = NULL;
5845f706 542 }
960fb66e 543
25331d6c 544 qdisc_qstats_backlog_inc(sch, skb);
960fb66e 545
5f86173b 546 cb = netem_skb_cb(skb);
cc7ec456 547 if (q->gap == 0 || /* not doing reordering */
a42b4799 548 q->counter < q->gap - 1 || /* inside last reordering gap */
3cad70bc 549 q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
112f9cb6
DT
550 u64 now;
551 s64 delay;
07aaa115
SH
552
553 delay = tabledist(q->latency, q->jitter,
3cad70bc 554 &q->delay_cor, &q->prng, q->delay_dist);
07aaa115 555
112f9cb6 556 now = ktime_get_ns();
7bc0f28c
HPP
557
558 if (q->rate) {
5080f39e
NU
559 struct netem_skb_cb *last = NULL;
560
561 if (sch->q.tail)
562 last = netem_skb_cb(sch->q.tail);
563 if (q->t_root.rb_node) {
564 struct sk_buff *t_skb;
565 struct netem_skb_cb *t_last;
566
18a4c0ea 567 t_skb = skb_rb_last(&q->t_root);
5080f39e
NU
568 t_last = netem_skb_cb(t_skb);
569 if (!last ||
d66280b1
PO
570 t_last->time_to_send > last->time_to_send)
571 last = t_last;
572 }
573 if (q->t_tail) {
574 struct netem_skb_cb *t_last =
575 netem_skb_cb(q->t_tail);
576
577 if (!last ||
578 t_last->time_to_send > last->time_to_send)
5080f39e 579 last = t_last;
5080f39e 580 }
7bc0f28c 581
aec0a40a 582 if (last) {
7bc0f28c 583 /*
a13d3104
JN
584 * Last packet in queue is reference point (now),
585 * calculate this time bonus and subtract
7bc0f28c
HPP
586 * from delay.
587 */
5080f39e 588 delay -= last->time_to_send - now;
112f9cb6 589 delay = max_t(s64, 0, delay);
5080f39e 590 now = last->time_to_send;
7bc0f28c 591 }
a13d3104 592
bce552fd 593 delay += packet_time_ns(qdisc_pkt_len(skb), q);
7bc0f28c
HPP
594 }
595
7c59e25f 596 cb->time_to_send = now + delay;
1da177e4 597 ++q->counter;
960fb66e 598 tfifo_enqueue(skb, sch);
1da177e4 599 } else {
10297b99 600 /*
0dca51d3
SH
601 * Do re-ordering by putting one out of N packets at the front
602 * of the queue.
603 */
112f9cb6 604 cb->time_to_send = ktime_get_ns();
0dca51d3 605 q->counter = 0;
8ba25dad 606
59697730 607 __qdisc_enqueue_head(skb, &sch->q);
eb101924 608 sch->qstats.requeues++;
378a2f09 609 }
1da177e4 610
6071bd1a 611finish_segs:
c07ff859
SH
612 if (skb2)
613 __qdisc_drop(skb2, to_free);
614
6071bd1a 615 if (segs) {
177b8007 616 unsigned int len, last_len;
c07ff859 617 int rc, nb;
177b8007 618
a7fa12d1
JK
619 len = skb ? skb->len : 0;
620 nb = skb ? 1 : 0;
177b8007 621
6071bd1a
NH
622 while (segs) {
623 skb2 = segs->next;
a8305bff 624 skb_mark_not_on_list(segs);
6071bd1a
NH
625 qdisc_skb_cb(segs)->pkt_len = segs->len;
626 last_len = segs->len;
520ac30f 627 rc = qdisc_enqueue(segs, sch, to_free);
6071bd1a
NH
628 if (rc != NET_XMIT_SUCCESS) {
629 if (net_xmit_drop_count(rc))
630 qdisc_qstats_drop(sch);
631 } else {
632 nb++;
633 len += last_len;
634 }
635 segs = skb2;
636 }
a7fa12d1
JK
637 /* Parent qdiscs accounted for 1 skb of size @prev_len */
638 qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
e0ad032e
JK
639 } else if (!skb) {
640 return NET_XMIT_DROP;
6071bd1a 641 }
10f6dfcf 642 return NET_XMIT_SUCCESS;
1da177e4
LT
643}
644
836af83b
DT
645/* Delay the next round with a new future slot with a
646 * correct number of bytes and packets.
647 */
648
649static void get_slot_next(struct netem_sched_data *q, u64 now)
650{
0a9fe5c3
YS
651 s64 next_delay;
652
653 if (!q->slot_dist)
654 next_delay = q->slot_config.min_delay +
a251c17a 655 (get_random_u32() *
0a9fe5c3
YS
656 (q->slot_config.max_delay -
657 q->slot_config.min_delay) >> 32);
658 else
659 next_delay = tabledist(q->slot_config.dist_delay,
660 (s32)(q->slot_config.dist_jitter),
3cad70bc 661 NULL, &q->prng, q->slot_dist);
0a9fe5c3
YS
662
663 q->slot.slot_next = now + next_delay;
836af83b
DT
664 q->slot.packets_left = q->slot_config.max_packets;
665 q->slot.bytes_left = q->slot_config.max_bytes;
666}
667
d66280b1
PO
668static struct sk_buff *netem_peek(struct netem_sched_data *q)
669{
670 struct sk_buff *skb = skb_rb_first(&q->t_root);
671 u64 t1, t2;
672
673 if (!skb)
674 return q->t_head;
675 if (!q->t_head)
676 return skb;
677
678 t1 = netem_skb_cb(skb)->time_to_send;
679 t2 = netem_skb_cb(q->t_head)->time_to_send;
680 if (t1 < t2)
681 return skb;
682 return q->t_head;
683}
684
685static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
686{
687 if (skb == q->t_head) {
688 q->t_head = skb->next;
689 if (!q->t_head)
690 q->t_tail = NULL;
691 } else {
692 rb_erase(&skb->rbnode, &q->t_root);
693 }
694}
695
1da177e4
LT
696static struct sk_buff *netem_dequeue(struct Qdisc *sch)
697{
698 struct netem_sched_data *q = qdisc_priv(sch);
699 struct sk_buff *skb;
700
50612537 701tfifo_dequeue:
ed760cb8 702 skb = __qdisc_dequeue_head(&sch->q);
771018e7 703 if (skb) {
25331d6c 704 qdisc_qstats_backlog_dec(sch, skb);
0ad2a836 705deliver:
aec0a40a
ED
706 qdisc_bstats_update(sch, skb);
707 return skb;
708 }
d66280b1
PO
709 skb = netem_peek(q);
710 if (skb) {
112f9cb6 711 u64 time_to_send;
836af83b 712 u64 now = ktime_get_ns();
36b7bfe0 713
0f9f32ac 714 /* if more time remaining? */
36b7bfe0 715 time_to_send = netem_skb_cb(skb)->time_to_send;
836af83b
DT
716 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
717 get_slot_next(q, now);
aec0a40a 718
d66280b1
PO
719 if (time_to_send <= now && q->slot.slot_next <= now) {
720 netem_erase_head(q, skb);
aec0a40a 721 sch->q.qlen--;
0ad2a836 722 qdisc_qstats_backlog_dec(sch, skb);
aec0a40a
ED
723 skb->next = NULL;
724 skb->prev = NULL;
bffa72cf
ED
725 /* skb->dev shares skb->rbnode area,
726 * we need to restore its value.
727 */
728 skb->dev = qdisc_dev(sch);
03c05f0d 729
836af83b
DT
730 if (q->slot.slot_next) {
731 q->slot.packets_left--;
732 q->slot.bytes_left -= qdisc_pkt_len(skb);
733 if (q->slot.packets_left <= 0 ||
734 q->slot.bytes_left <= 0)
735 get_slot_next(q, now);
736 }
737
50612537 738 if (q->qdisc) {
21de12ee 739 unsigned int pkt_len = qdisc_pkt_len(skb);
520ac30f
ED
740 struct sk_buff *to_free = NULL;
741 int err;
50612537 742
520ac30f
ED
743 err = qdisc_enqueue(skb, q->qdisc, &to_free);
744 kfree_skb_list(to_free);
3b3a2a9c
SH
745 if (err != NET_XMIT_SUCCESS) {
746 if (net_xmit_drop_count(err))
747 qdisc_qstats_drop(sch);
748 qdisc_tree_reduce_backlog(sch, 1, pkt_len);
50612537
ED
749 }
750 goto tfifo_dequeue;
751 }
aec0a40a 752 goto deliver;
07aaa115 753 }
11274e5a 754
50612537
ED
755 if (q->qdisc) {
756 skb = q->qdisc->ops->dequeue(q->qdisc);
757 if (skb)
758 goto deliver;
759 }
836af83b
DT
760
761 qdisc_watchdog_schedule_ns(&q->watchdog,
762 max(time_to_send,
763 q->slot.slot_next));
0f9f32ac
SH
764 }
765
50612537
ED
766 if (q->qdisc) {
767 skb = q->qdisc->ops->dequeue(q->qdisc);
768 if (skb)
769 goto deliver;
770 }
0f9f32ac 771 return NULL;
1da177e4
LT
772}
773
1da177e4
LT
774static void netem_reset(struct Qdisc *sch)
775{
776 struct netem_sched_data *q = qdisc_priv(sch);
777
50612537 778 qdisc_reset_queue(sch);
ff704050 779 tfifo_reset(sch);
50612537
ED
780 if (q->qdisc)
781 qdisc_reset(q->qdisc);
59cb5c67 782 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
783}
784
6373a9a2 785static void dist_free(struct disttable *d)
786{
4cb28970 787 kvfree(d);
6373a9a2 788}
789
1da177e4
LT
790/*
791 * Distribution data is a variable size payload containing
792 * signed 16 bit values.
793 */
836af83b 794
11b73313 795static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
1da177e4 796{
6373a9a2 797 size_t n = nla_len(attr)/sizeof(__s16);
1e90474c 798 const __s16 *data = nla_data(attr);
1da177e4
LT
799 struct disttable *d;
800 int i;
801
b41d936b 802 if (!n || n > NETEM_DIST_MAX)
1da177e4
LT
803 return -EINVAL;
804
12929198 805 d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
1da177e4
LT
806 if (!d)
807 return -ENOMEM;
808
809 d->size = n;
810 for (i = 0; i < n; i++)
811 d->table[i] = data[i];
10297b99 812
11b73313 813 *tbl = d;
1da177e4
LT
814 return 0;
815}
816
836af83b
DT
817static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
818{
819 const struct tc_netem_slot *c = nla_data(attr);
820
821 q->slot_config = *c;
822 if (q->slot_config.max_packets == 0)
823 q->slot_config.max_packets = INT_MAX;
824 if (q->slot_config.max_bytes == 0)
825 q->slot_config.max_bytes = INT_MAX;
eadd1bef
AN
826
827 /* capping dist_jitter to the range acceptable by tabledist() */
828 q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
829
836af83b
DT
830 q->slot.packets_left = q->slot_config.max_packets;
831 q->slot.bytes_left = q->slot_config.max_bytes;
0a9fe5c3
YS
832 if (q->slot_config.min_delay | q->slot_config.max_delay |
833 q->slot_config.dist_jitter)
836af83b
DT
834 q->slot.slot_next = ktime_get_ns();
835 else
836 q->slot.slot_next = 0;
837}
838
49545a77 839static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
1da177e4 840{
1e90474c 841 const struct tc_netem_corr *c = nla_data(attr);
1da177e4 842
1da177e4
LT
843 init_crandom(&q->delay_cor, c->delay_corr);
844 init_crandom(&q->loss_cor, c->loss_corr);
845 init_crandom(&q->dup_cor, c->dup_corr);
1da177e4
LT
846}
847
49545a77 848static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
0dca51d3 849{
1e90474c 850 const struct tc_netem_reorder *r = nla_data(attr);
0dca51d3 851
0dca51d3
SH
852 q->reorder = r->probability;
853 init_crandom(&q->reorder_cor, r->correlation);
0dca51d3
SH
854}
855
49545a77 856static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
c865e5d9 857{
1e90474c 858 const struct tc_netem_corrupt *r = nla_data(attr);
c865e5d9 859
c865e5d9
SH
860 q->corrupt = r->probability;
861 init_crandom(&q->corrupt_cor, r->correlation);
c865e5d9
SH
862}
863
49545a77 864static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
7bc0f28c 865{
7bc0f28c
HPP
866 const struct tc_netem_rate *r = nla_data(attr);
867
868 q->rate = r->rate;
90b41a1c
HPP
869 q->packet_overhead = r->packet_overhead;
870 q->cell_size = r->cell_size;
809fa972 871 q->cell_overhead = r->cell_overhead;
90b41a1c
HPP
872 if (q->cell_size)
873 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
809fa972
HFS
874 else
875 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
7bc0f28c
HPP
876}
877
49545a77 878static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
661b7972 879{
661b7972 880 const struct nlattr *la;
881 int rem;
882
883 nla_for_each_nested(la, attr, rem) {
884 u16 type = nla_type(la);
885
833fa743 886 switch (type) {
661b7972 887 case NETEM_LOSS_GI: {
888 const struct tc_netem_gimodel *gi = nla_data(la);
889
2494654d 890 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
661b7972 891 pr_info("netem: incorrect gi model size\n");
892 return -EINVAL;
893 }
894
895 q->loss_model = CLG_4_STATES;
896
3fbac2a8 897 q->clg.state = TX_IN_GAP_PERIOD;
661b7972 898 q->clg.a1 = gi->p13;
899 q->clg.a2 = gi->p31;
900 q->clg.a3 = gi->p32;
901 q->clg.a4 = gi->p14;
902 q->clg.a5 = gi->p23;
903 break;
904 }
905
906 case NETEM_LOSS_GE: {
907 const struct tc_netem_gemodel *ge = nla_data(la);
908
2494654d 909 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
910 pr_info("netem: incorrect ge model size\n");
661b7972 911 return -EINVAL;
912 }
913
914 q->loss_model = CLG_GILB_ELL;
3fbac2a8 915 q->clg.state = GOOD_STATE;
661b7972 916 q->clg.a1 = ge->p;
917 q->clg.a2 = ge->r;
918 q->clg.a3 = ge->h;
919 q->clg.a4 = ge->k1;
920 break;
921 }
922
923 default:
924 pr_info("netem: unknown loss type %u\n", type);
925 return -EINVAL;
926 }
927 }
928
929 return 0;
930}
931
27a3421e
PM
932static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
933 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
934 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
935 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
7bc0f28c 936 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
661b7972 937 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
e4ae004b 938 [TCA_NETEM_ECN] = { .type = NLA_U32 },
6a031f67 939 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
99803171
DT
940 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
941 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
836af83b 942 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
4072d97d 943 [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 },
27a3421e
PM
944};
945
2c10b32b
TG
946static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
947 const struct nla_policy *policy, int len)
948{
949 int nested_len = nla_len(nla) - NLA_ALIGN(len);
950
661b7972 951 if (nested_len < 0) {
952 pr_info("netem: invalid attributes len %d\n", nested_len);
2c10b32b 953 return -EINVAL;
661b7972 954 }
955
2c10b32b 956 if (nested_len >= nla_attr_size(0))
8cb08174
JB
957 return nla_parse_deprecated(tb, maxtype,
958 nla_data(nla) + NLA_ALIGN(len),
959 nested_len, policy, NULL);
661b7972 960
2c10b32b
TG
961 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
962 return 0;
963}
964
c865e5d9 965/* Parse netlink message to set options */
2030721c
AA
966static int netem_change(struct Qdisc *sch, struct nlattr *opt,
967 struct netlink_ext_ack *extack)
1da177e4
LT
968{
969 struct netem_sched_data *q = qdisc_priv(sch);
b03f4672 970 struct nlattr *tb[TCA_NETEM_MAX + 1];
11b73313
ED
971 struct disttable *delay_dist = NULL;
972 struct disttable *slot_dist = NULL;
1da177e4 973 struct tc_netem_qopt *qopt;
54a4b05c
YY
974 struct clgstate old_clg;
975 int old_loss_model = CLG_RANDOM;
1da177e4 976 int ret;
10297b99 977
2c10b32b
TG
978 qopt = nla_data(opt);
979 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
b03f4672
PM
980 if (ret < 0)
981 return ret;
982
11b73313
ED
983 if (tb[TCA_NETEM_DELAY_DIST]) {
984 ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
985 if (ret)
986 goto table_free;
987 }
988
989 if (tb[TCA_NETEM_SLOT_DIST]) {
990 ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
991 if (ret)
992 goto table_free;
993 }
994
2174a08d 995 sch_tree_lock(sch);
54a4b05c
YY
996 /* backup q->clg and q->loss_model */
997 old_clg = q->clg;
998 old_loss_model = q->loss_model;
999
1000 if (tb[TCA_NETEM_LOSS]) {
49545a77 1001 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
54a4b05c
YY
1002 if (ret) {
1003 q->loss_model = old_loss_model;
11b73313 1004 q->clg = old_clg;
2174a08d 1005 goto unlock;
54a4b05c
YY
1006 }
1007 } else {
1008 q->loss_model = CLG_RANDOM;
1009 }
1010
11b73313
ED
1011 if (delay_dist)
1012 swap(q->delay_dist, delay_dist);
1013 if (slot_dist)
1014 swap(q->slot_dist, slot_dist);
50612537 1015 sch->limit = qopt->limit;
10297b99 1016
112f9cb6
DT
1017 q->latency = PSCHED_TICKS2NS(qopt->latency);
1018 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
1da177e4
LT
1019 q->limit = qopt->limit;
1020 q->gap = qopt->gap;
0dca51d3 1021 q->counter = 0;
1da177e4
LT
1022 q->loss = qopt->loss;
1023 q->duplicate = qopt->duplicate;
1024
bb2f8cc0
SH
1025 /* for compatibility with earlier versions.
1026 * if gap is set, need to assume 100% probability
0dca51d3 1027 */
a362e0a7
SH
1028 if (q->gap)
1029 q->reorder = ~0;
0dca51d3 1030
265eb67f 1031 if (tb[TCA_NETEM_CORR])
49545a77 1032 get_correlation(q, tb[TCA_NETEM_CORR]);
1da177e4 1033
265eb67f 1034 if (tb[TCA_NETEM_REORDER])
49545a77 1035 get_reorder(q, tb[TCA_NETEM_REORDER]);
1da177e4 1036
265eb67f 1037 if (tb[TCA_NETEM_CORRUPT])
49545a77 1038 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1da177e4 1039
7bc0f28c 1040 if (tb[TCA_NETEM_RATE])
49545a77 1041 get_rate(q, tb[TCA_NETEM_RATE]);
7bc0f28c 1042
6a031f67
YY
1043 if (tb[TCA_NETEM_RATE64])
1044 q->rate = max_t(u64, q->rate,
1045 nla_get_u64(tb[TCA_NETEM_RATE64]));
1046
99803171
DT
1047 if (tb[TCA_NETEM_LATENCY64])
1048 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1049
1050 if (tb[TCA_NETEM_JITTER64])
1051 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1052
e4ae004b
ED
1053 if (tb[TCA_NETEM_ECN])
1054 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1055
836af83b
DT
1056 if (tb[TCA_NETEM_SLOT])
1057 get_slot(q, tb[TCA_NETEM_SLOT]);
1058
eadd1bef
AN
1059 /* capping jitter to the range acceptable by tabledist() */
1060 q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1061
4072d97d
FM
1062 if (tb[TCA_NETEM_PRNG_SEED])
1063 q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
1064 else
1065 q->prng.seed = get_random_u64();
1066 prandom_seed_state(&q->prng.prng_state, q->prng.seed);
1067
2174a08d
ED
1068unlock:
1069 sch_tree_unlock(sch);
0a9fe5c3 1070
11b73313
ED
1071table_free:
1072 dist_free(delay_dist);
1073 dist_free(slot_dist);
1074 return ret;
1da177e4
LT
1075}
1076
e63d7dfd
AA
1077static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1078 struct netlink_ext_ack *extack)
1da177e4
LT
1079{
1080 struct netem_sched_data *q = qdisc_priv(sch);
1081 int ret;
1082
634576a1
NA
1083 qdisc_watchdog_init(&q->watchdog, sch);
1084
1da177e4
LT
1085 if (!opt)
1086 return -EINVAL;
1087
661b7972 1088 q->loss_model = CLG_RANDOM;
2030721c 1089 ret = netem_change(sch, opt, extack);
50612537 1090 if (ret)
250a65f7 1091 pr_info("netem: change failed\n");
1da177e4
LT
1092 return ret;
1093}
1094
1095static void netem_destroy(struct Qdisc *sch)
1096{
1097 struct netem_sched_data *q = qdisc_priv(sch);
1098
59cb5c67 1099 qdisc_watchdog_cancel(&q->watchdog);
50612537 1100 if (q->qdisc)
86bd446b 1101 qdisc_put(q->qdisc);
6373a9a2 1102 dist_free(q->delay_dist);
0a9fe5c3 1103 dist_free(q->slot_dist);
1da177e4
LT
1104}
1105
661b7972 1106static int dump_loss_model(const struct netem_sched_data *q,
1107 struct sk_buff *skb)
1108{
1109 struct nlattr *nest;
1110
ae0be8de 1111 nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
661b7972 1112 if (nest == NULL)
1113 goto nla_put_failure;
1114
1115 switch (q->loss_model) {
1116 case CLG_RANDOM:
1117 /* legacy loss model */
1118 nla_nest_cancel(skb, nest);
1119 return 0; /* no data */
1120
1121 case CLG_4_STATES: {
1122 struct tc_netem_gimodel gi = {
1123 .p13 = q->clg.a1,
1124 .p31 = q->clg.a2,
1125 .p32 = q->clg.a3,
1126 .p14 = q->clg.a4,
1127 .p23 = q->clg.a5,
1128 };
1129
1b34ec43
DM
1130 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1131 goto nla_put_failure;
661b7972 1132 break;
1133 }
1134 case CLG_GILB_ELL: {
1135 struct tc_netem_gemodel ge = {
1136 .p = q->clg.a1,
1137 .r = q->clg.a2,
1138 .h = q->clg.a3,
1139 .k1 = q->clg.a4,
1140 };
1141
1b34ec43
DM
1142 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1143 goto nla_put_failure;
661b7972 1144 break;
1145 }
1146 }
1147
1148 nla_nest_end(skb, nest);
1149 return 0;
1150
1151nla_put_failure:
1152 nla_nest_cancel(skb, nest);
1153 return -1;
1154}
1155
1da177e4
LT
1156static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1157{
1158 const struct netem_sched_data *q = qdisc_priv(sch);
861d7f74 1159 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1da177e4
LT
1160 struct tc_netem_qopt qopt;
1161 struct tc_netem_corr cor;
0dca51d3 1162 struct tc_netem_reorder reorder;
c865e5d9 1163 struct tc_netem_corrupt corrupt;
7bc0f28c 1164 struct tc_netem_rate rate;
836af83b 1165 struct tc_netem_slot slot;
1da177e4 1166
a2b1a5d4 1167 qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
112f9cb6 1168 UINT_MAX);
a2b1a5d4 1169 qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
112f9cb6 1170 UINT_MAX);
1da177e4
LT
1171 qopt.limit = q->limit;
1172 qopt.loss = q->loss;
1173 qopt.gap = q->gap;
1174 qopt.duplicate = q->duplicate;
1b34ec43
DM
1175 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1176 goto nla_put_failure;
1da177e4 1177
99803171
DT
1178 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1179 goto nla_put_failure;
1180
1181 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1182 goto nla_put_failure;
1183
1da177e4
LT
1184 cor.delay_corr = q->delay_cor.rho;
1185 cor.loss_corr = q->loss_cor.rho;
1186 cor.dup_corr = q->dup_cor.rho;
1b34ec43
DM
1187 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1188 goto nla_put_failure;
0dca51d3
SH
1189
1190 reorder.probability = q->reorder;
1191 reorder.correlation = q->reorder_cor.rho;
1b34ec43
DM
1192 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1193 goto nla_put_failure;
0dca51d3 1194
c865e5d9
SH
1195 corrupt.probability = q->corrupt;
1196 corrupt.correlation = q->corrupt_cor.rho;
1b34ec43
DM
1197 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1198 goto nla_put_failure;
c865e5d9 1199
6a031f67 1200 if (q->rate >= (1ULL << 32)) {
2a51c1e8
ND
1201 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1202 TCA_NETEM_PAD))
6a031f67
YY
1203 goto nla_put_failure;
1204 rate.rate = ~0U;
1205 } else {
1206 rate.rate = q->rate;
1207 }
90b41a1c
HPP
1208 rate.packet_overhead = q->packet_overhead;
1209 rate.cell_size = q->cell_size;
1210 rate.cell_overhead = q->cell_overhead;
1b34ec43
DM
1211 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1212 goto nla_put_failure;
7bc0f28c 1213
e4ae004b
ED
1214 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1215 goto nla_put_failure;
1216
661b7972 1217 if (dump_loss_model(q, skb) != 0)
1218 goto nla_put_failure;
1219
0a9fe5c3
YS
1220 if (q->slot_config.min_delay | q->slot_config.max_delay |
1221 q->slot_config.dist_jitter) {
836af83b
DT
1222 slot = q->slot_config;
1223 if (slot.max_packets == INT_MAX)
1224 slot.max_packets = 0;
1225 if (slot.max_bytes == INT_MAX)
1226 slot.max_bytes = 0;
1227 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1228 goto nla_put_failure;
1229 }
1230
4072d97d
FM
1231 if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
1232 TCA_NETEM_PAD))
1233 goto nla_put_failure;
1234
861d7f74 1235 return nla_nest_end(skb, nla);
1da177e4 1236
1e90474c 1237nla_put_failure:
861d7f74 1238 nlmsg_trim(skb, nla);
1da177e4
LT
1239 return -1;
1240}
1241
10f6dfcf 1242static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1243 struct sk_buff *skb, struct tcmsg *tcm)
1244{
1245 struct netem_sched_data *q = qdisc_priv(sch);
1246
50612537 1247 if (cl != 1 || !q->qdisc) /* only one class */
10f6dfcf 1248 return -ENOENT;
1249
1250 tcm->tcm_handle |= TC_H_MIN(1);
1251 tcm->tcm_info = q->qdisc->handle;
1252
1253 return 0;
1254}
1255
1256static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 1257 struct Qdisc **old, struct netlink_ext_ack *extack)
10f6dfcf 1258{
1259 struct netem_sched_data *q = qdisc_priv(sch);
1260
86a7996c 1261 *old = qdisc_replace(sch, new, &q->qdisc);
10f6dfcf 1262 return 0;
1263}
1264
1265static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1266{
1267 struct netem_sched_data *q = qdisc_priv(sch);
1268 return q->qdisc;
1269}
1270
143976ce 1271static unsigned long netem_find(struct Qdisc *sch, u32 classid)
10f6dfcf 1272{
1273 return 1;
1274}
1275
10f6dfcf 1276static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1277{
1278 if (!walker->stop) {
e046fa89
ZS
1279 if (!tc_qdisc_stats_dump(sch, 1, walker))
1280 return;
10f6dfcf 1281 }
1282}
1283
1284static const struct Qdisc_class_ops netem_class_ops = {
1285 .graft = netem_graft,
1286 .leaf = netem_leaf,
143976ce 1287 .find = netem_find,
10f6dfcf 1288 .walk = netem_walk,
1289 .dump = netem_dump_class,
1290};
1291
20fea08b 1292static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1da177e4 1293 .id = "netem",
10f6dfcf 1294 .cl_ops = &netem_class_ops,
1da177e4
LT
1295 .priv_size = sizeof(struct netem_sched_data),
1296 .enqueue = netem_enqueue,
1297 .dequeue = netem_dequeue,
77be155c 1298 .peek = qdisc_peek_dequeued,
1da177e4
LT
1299 .init = netem_init,
1300 .reset = netem_reset,
1301 .destroy = netem_destroy,
1302 .change = netem_change,
1303 .dump = netem_dump,
1304 .owner = THIS_MODULE,
1305};
241a94ab 1306MODULE_ALIAS_NET_SCH("netem");
1da177e4
LT
1307
1308
1309static int __init netem_module_init(void)
1310{
eb229c4c 1311 pr_info("netem: version " VERSION "\n");
1da177e4
LT
1312 return register_qdisc(&netem_qdisc_ops);
1313}
1314static void __exit netem_module_exit(void)
1315{
1316 unregister_qdisc(&netem_qdisc_ops);
1317}
1318module_init(netem_module_init)
1319module_exit(netem_module_exit)
1320MODULE_LICENSE("GPL");
f96118c5 1321MODULE_DESCRIPTION("Network characteristics emulator qdisc");