Merge tag 'riscv-for-linus-6.16-rc5' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / net / sched / sch_netem.c
CommitLineData
84a14ae8 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4
LT
2/*
3 * net/sched/sch_netem.c Network emulator
4 *
1da177e4 5 * Many of the algorithms and ideas for this came from
10297b99 6 * NIST Net which is not copyrighted.
1da177e4
LT
7 *
8 * Authors: Stephen Hemminger <shemminger@osdl.org>
9 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
10 */
11
b7f080cf 12#include <linux/mm.h>
1da177e4 13#include <linux/module.h>
5a0e3ad6 14#include <linux/slab.h>
1da177e4
LT
15#include <linux/types.h>
16#include <linux/kernel.h>
17#include <linux/errno.h>
1da177e4 18#include <linux/skbuff.h>
78776d3f 19#include <linux/vmalloc.h>
32b7580b 20#include <linux/prandom.h>
1da177e4 21#include <linux/rtnetlink.h>
90b41a1c 22#include <linux/reciprocal_div.h>
aec0a40a 23#include <linux/rbtree.h>
1da177e4 24
d457a0e3 25#include <net/gso.h>
dc5fc579 26#include <net/netlink.h>
1da177e4 27#include <net/pkt_sched.h>
e4ae004b 28#include <net/inet_ecn.h>
1da177e4 29
250a65f7 30#define VERSION "1.3"
eb229c4c 31
1da177e4
LT
32/* Network Emulation Queuing algorithm.
33 ====================================
34
35 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
36 Network Emulation Tool
37 [2] Luigi Rizzo, DummyNet for FreeBSD
38
39 ----------------------------------------------------------------
40
41 This started out as a simple way to delay outgoing packets to
42 test TCP but has grown to include most of the functionality
43 of a full blown network emulator like NISTnet. It can delay
44 packets and add random jitter (and correlation). The random
45 distribution can be loaded from a table as well to provide
46 normal, Pareto, or experimental curves. Packet loss,
47 duplication, and reordering can also be emulated.
48
49 This qdisc does not do classification that can be handled in
50 layering other disciplines. It does not need to do bandwidth
51 control either since that can be handled by using token
52 bucket or other rate control.
661b7972 53
54 Correlated Loss Generator models
55
56 Added generation of correlated loss according to the
57 "Gilbert-Elliot" model, a 4-state markov model.
58
59 References:
60 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
61 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
62 and intuitive loss model for packet networks and its implementation
63 in the Netem module in the Linux kernel", available in [1]
64
65 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
66 Fabio Ludovici <fabio.ludovici at yahoo.it>
1da177e4
LT
67*/
68
0a9fe5c3
YS
69struct disttable {
70 u32 size;
0fef0907 71 s16 table[] __counted_by(size);
0a9fe5c3
YS
72};
73
1da177e4 74struct netem_sched_data {
aec0a40a
ED
75 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
76 struct rb_root t_root;
50612537 77
d66280b1
PO
78 /* a linear queue; reduces rbtree rebalancing when jitter is low */
79 struct sk_buff *t_head;
80 struct sk_buff *t_tail;
81
f8d4bc45
MO
82 u32 t_len;
83
50612537 84 /* optional qdisc for classful handling (NULL at netem init) */
1da177e4 85 struct Qdisc *qdisc;
50612537 86
59cb5c67 87 struct qdisc_watchdog watchdog;
1da177e4 88
112f9cb6
DT
89 s64 latency;
90 s64 jitter;
b407621c 91
1da177e4 92 u32 loss;
e4ae004b 93 u32 ecn;
1da177e4
LT
94 u32 limit;
95 u32 counter;
96 u32 gap;
1da177e4 97 u32 duplicate;
0dca51d3 98 u32 reorder;
c865e5d9 99 u32 corrupt;
6a031f67 100 u64 rate;
90b41a1c
HPP
101 s32 packet_overhead;
102 u32 cell_size;
809fa972 103 struct reciprocal_value cell_size_reciprocal;
90b41a1c 104 s32 cell_overhead;
1da177e4
LT
105
106 struct crndstate {
b407621c
SH
107 u32 last;
108 u32 rho;
c865e5d9 109 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1da177e4 110
4072d97d
FM
111 struct prng {
112 u64 seed;
113 struct rnd_state prng_state;
114 } prng;
115
0a9fe5c3 116 struct disttable *delay_dist;
661b7972 117
118 enum {
119 CLG_RANDOM,
120 CLG_4_STATES,
121 CLG_GILB_ELL,
122 } loss_model;
123
a6e2fe17
YY
124 enum {
125 TX_IN_GAP_PERIOD = 1,
126 TX_IN_BURST_PERIOD,
127 LOST_IN_GAP_PERIOD,
128 LOST_IN_BURST_PERIOD,
129 } _4_state_model;
130
c045a734
YY
131 enum {
132 GOOD_STATE = 1,
133 BAD_STATE,
134 } GE_state_model;
135
661b7972 136 /* Correlated Loss Generation models */
137 struct clgstate {
138 /* state of the Markov chain */
139 u8 state;
140
141 /* 4-states and Gilbert-Elliot models */
142 u32 a1; /* p13 for 4-states or p for GE */
143 u32 a2; /* p31 for 4-states or r for GE */
144 u32 a3; /* p32 for 4-states or h for GE */
145 u32 a4; /* p14 for 4-states or 1-k for GE */
146 u32 a5; /* p23 used only in 4-states */
147 } clg;
148
836af83b
DT
149 struct tc_netem_slot slot_config;
150 struct slotstate {
151 u64 slot_next;
152 s32 packets_left;
153 s32 bytes_left;
154 } slot;
155
0a9fe5c3 156 struct disttable *slot_dist;
1da177e4
LT
157};
158
50612537
ED
159/* Time stamp put into socket buffer control block
160 * Only valid when skbs are in our internal t(ime)fifo queue.
56b17425
ED
161 *
162 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
163 * and skb->next & skb->prev are scratch space for a qdisc,
164 * we save skb->tstamp value in skb->cb[] before destroying it.
50612537 165 */
1da177e4 166struct netem_skb_cb {
112f9cb6 167 u64 time_to_send;
1da177e4
LT
168};
169
5f86173b
JK
170static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
171{
aec0a40a 172 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
16bda13d 173 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
175f9c1b 174 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
5f86173b
JK
175}
176
1da177e4
LT
177/* init_crandom - initialize correlated random number generator
178 * Use entropy source for initial seed.
179 */
180static void init_crandom(struct crndstate *state, unsigned long rho)
181{
182 state->rho = rho;
a251c17a 183 state->last = get_random_u32();
1da177e4
LT
184}
185
186/* get_crandom - correlated random number generator
187 * Next number depends on last value.
188 * rho is scaled to avoid floating point.
189 */
3cad70bc 190static u32 get_crandom(struct crndstate *state, struct prng *p)
1da177e4
LT
191{
192 u64 value, rho;
193 unsigned long answer;
3cad70bc 194 struct rnd_state *s = &p->prng_state;
1da177e4 195
0a9fe5c3 196 if (!state || state->rho == 0) /* no correlation */
3cad70bc 197 return prandom_u32_state(s);
1da177e4 198
3cad70bc 199 value = prandom_u32_state(s);
1da177e4
LT
200 rho = (u64)state->rho + 1;
201 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
202 state->last = answer;
203 return answer;
204}
205
661b7972 206/* loss_4state - 4-state model loss generator
207 * Generates losses according to the 4-state Markov chain adopted in
208 * the GI (General and Intuitive) loss model.
209 */
210static bool loss_4state(struct netem_sched_data *q)
211{
212 struct clgstate *clg = &q->clg;
9c87b2ae 213 u32 rnd = prandom_u32_state(&q->prng.prng_state);
661b7972 214
215 /*
25985edc 216 * Makes a comparison between rnd and the transition
661b7972 217 * probabilities outgoing from the current state, then decides the
218 * next state and if the next packet has to be transmitted or lost.
219 * The four states correspond to:
a6e2fe17 220 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
cb3ef7b0
HM
221 * LOST_IN_GAP_PERIOD => isolated losses within a gap period
222 * LOST_IN_BURST_PERIOD => lost packets within a burst period
223 * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
661b7972 224 */
225 switch (clg->state) {
a6e2fe17 226 case TX_IN_GAP_PERIOD:
661b7972 227 if (rnd < clg->a4) {
cb3ef7b0 228 clg->state = LOST_IN_GAP_PERIOD;
661b7972 229 return true;
ab6c27be 230 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
cb3ef7b0 231 clg->state = LOST_IN_BURST_PERIOD;
661b7972 232 return true;
a6e2fe17
YY
233 } else if (clg->a1 + clg->a4 < rnd) {
234 clg->state = TX_IN_GAP_PERIOD;
235 }
661b7972 236
237 break;
a6e2fe17 238 case TX_IN_BURST_PERIOD:
661b7972 239 if (rnd < clg->a5) {
cb3ef7b0 240 clg->state = LOST_IN_BURST_PERIOD;
661b7972 241 return true;
a6e2fe17
YY
242 } else {
243 clg->state = TX_IN_BURST_PERIOD;
244 }
661b7972 245
246 break;
cb3ef7b0 247 case LOST_IN_BURST_PERIOD:
661b7972 248 if (rnd < clg->a3)
a6e2fe17 249 clg->state = TX_IN_BURST_PERIOD;
661b7972 250 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
a6e2fe17 251 clg->state = TX_IN_GAP_PERIOD;
661b7972 252 } else if (clg->a2 + clg->a3 < rnd) {
cb3ef7b0 253 clg->state = LOST_IN_BURST_PERIOD;
661b7972 254 return true;
255 }
256 break;
cb3ef7b0 257 case LOST_IN_GAP_PERIOD:
a6e2fe17 258 clg->state = TX_IN_GAP_PERIOD;
661b7972 259 break;
260 }
261
262 return false;
263}
264
265/* loss_gilb_ell - Gilbert-Elliot model loss generator
266 * Generates losses according to the Gilbert-Elliot loss model or
267 * its special cases (Gilbert or Simple Gilbert)
268 *
25985edc 269 * Makes a comparison between random number and the transition
661b7972 270 * probabilities outgoing from the current state, then decides the
25985edc 271 * next state. A second random number is extracted and the comparison
661b7972 272 * with the loss probability of the current state decides if the next
273 * packet will be transmitted or lost.
274 */
275static bool loss_gilb_ell(struct netem_sched_data *q)
276{
277 struct clgstate *clg = &q->clg;
9c87b2ae 278 struct rnd_state *s = &q->prng.prng_state;
661b7972 279
280 switch (clg->state) {
c045a734 281 case GOOD_STATE:
9c87b2ae 282 if (prandom_u32_state(s) < clg->a1)
c045a734 283 clg->state = BAD_STATE;
9c87b2ae 284 if (prandom_u32_state(s) < clg->a4)
661b7972 285 return true;
7c2781fa 286 break;
c045a734 287 case BAD_STATE:
9c87b2ae 288 if (prandom_u32_state(s) < clg->a2)
c045a734 289 clg->state = GOOD_STATE;
9c87b2ae 290 if (prandom_u32_state(s) > clg->a3)
661b7972 291 return true;
292 }
293
294 return false;
295}
296
297static bool loss_event(struct netem_sched_data *q)
298{
299 switch (q->loss_model) {
300 case CLG_RANDOM:
301 /* Random packet drop 0 => none, ~0 => all */
3cad70bc 302 return q->loss && q->loss >= get_crandom(&q->loss_cor, &q->prng);
661b7972 303
304 case CLG_4_STATES:
305 /* 4state loss model algorithm (used also for GI model)
306 * Extracts a value from the markov 4 state loss generator,
307 * if it is 1 drops a packet and if needed writes the event in
308 * the kernel logs
309 */
310 return loss_4state(q);
311
312 case CLG_GILB_ELL:
313 /* Gilbert-Elliot loss model algorithm
314 * Extracts a value from the Gilbert-Elliot loss generator,
315 * if it is 1 drops a packet and if needed writes the event in
316 * the kernel logs
317 */
318 return loss_gilb_ell(q);
319 }
320
321 return false; /* not reached */
322}
323
324
1da177e4
LT
325/* tabledist - return a pseudo-randomly distributed value with mean mu and
326 * std deviation sigma. Uses table lookup to approximate the desired
327 * distribution, and a uniformly-distributed pseudo-random source.
328 */
9b0ed891 329static s64 tabledist(s64 mu, s32 sigma,
112f9cb6 330 struct crndstate *state,
3cad70bc 331 struct prng *prng,
9b0ed891 332 const struct disttable *dist)
1da177e4 333{
112f9cb6 334 s64 x;
b407621c
SH
335 long t;
336 u32 rnd;
1da177e4
LT
337
338 if (sigma == 0)
339 return mu;
340
3cad70bc 341 rnd = get_crandom(state, prng);
1da177e4
LT
342
343 /* default uniform distribution */
10297b99 344 if (dist == NULL)
eadd1bef 345 return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
1da177e4
LT
346
347 t = dist->table[rnd % dist->size];
348 x = (sigma % NETEM_DIST_SCALE) * t;
349 if (x >= 0)
350 x += NETEM_DIST_SCALE/2;
351 else
352 x -= NETEM_DIST_SCALE/2;
353
354 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
355}
356
bce552fd 357static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
7bc0f28c 358{
90b41a1c
HPP
359 len += q->packet_overhead;
360
361 if (q->cell_size) {
362 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
363
364 if (len > cells * q->cell_size) /* extra cell needed for remainder */
365 cells++;
366 len = cells * (q->cell_size + q->cell_overhead);
367 }
bce552fd
SH
368
369 return div64_u64(len * NSEC_PER_SEC, q->rate);
7bc0f28c
HPP
370}
371
ff704050 372static void tfifo_reset(struct Qdisc *sch)
373{
374 struct netem_sched_data *q = qdisc_priv(sch);
3aa605f2 375 struct rb_node *p = rb_first(&q->t_root);
ff704050 376
3aa605f2 377 while (p) {
18a4c0ea 378 struct sk_buff *skb = rb_to_skb(p);
ff704050 379
3aa605f2
ED
380 p = rb_next(p);
381 rb_erase(&skb->rbnode, &q->t_root);
2f08a9a1 382 rtnl_kfree_skbs(skb, skb);
ff704050 383 }
d66280b1
PO
384
385 rtnl_kfree_skbs(q->t_head, q->t_tail);
386 q->t_head = NULL;
387 q->t_tail = NULL;
f8d4bc45 388 q->t_len = 0;
ff704050 389}
390
960fb66e 391static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
50612537 392{
aec0a40a 393 struct netem_sched_data *q = qdisc_priv(sch);
112f9cb6 394 u64 tnext = netem_skb_cb(nskb)->time_to_send;
50612537 395
d66280b1
PO
396 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
397 if (q->t_tail)
398 q->t_tail->next = nskb;
aec0a40a 399 else
d66280b1
PO
400 q->t_head = nskb;
401 q->t_tail = nskb;
402 } else {
403 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
404
405 while (*p) {
406 struct sk_buff *skb;
407
408 parent = *p;
409 skb = rb_to_skb(parent);
410 if (tnext >= netem_skb_cb(skb)->time_to_send)
411 p = &parent->rb_right;
412 else
413 p = &parent->rb_left;
414 }
415 rb_link_node(&nskb->rbnode, parent, p);
416 rb_insert_color(&nskb->rbnode, &q->t_root);
50612537 417 }
f8d4bc45 418 q->t_len++;
aec0a40a 419 sch->q.qlen++;
50612537
ED
420}
421
6071bd1a
NH
422/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
423 * when we statistically choose to corrupt one, we instead segment it, returning
424 * the first packet to be corrupted, and re-enqueue the remaining frames
425 */
520ac30f
ED
426static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
427 struct sk_buff **to_free)
6071bd1a
NH
428{
429 struct sk_buff *segs;
430 netdev_features_t features = netif_skb_features(skb);
431
432 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
433
434 if (IS_ERR_OR_NULL(segs)) {
520ac30f 435 qdisc_drop(skb, sch, to_free);
6071bd1a
NH
436 return NULL;
437 }
438 consume_skb(skb);
439 return segs;
440}
441
0afb51e7
SH
442/*
443 * Insert one skb into qdisc.
444 * Note: parent depends on return value to account for queue length.
445 * NET_XMIT_DROP: queue length didn't change.
446 * NET_XMIT_SUCCESS: one skb was queued.
447 */
520ac30f
ED
448static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
449 struct sk_buff **to_free)
1da177e4
LT
450{
451 struct netem_sched_data *q = qdisc_priv(sch);
89e1df74
GC
452 /* We don't fill cb now as skb_unshare() may invalidate it */
453 struct netem_skb_cb *cb;
c07ff859 454 struct sk_buff *skb2 = NULL;
6071bd1a 455 struct sk_buff *segs = NULL;
177b8007 456 unsigned int prev_len = qdisc_pkt_len(skb);
0afb51e7 457 int count = 1;
1da177e4 458
9410d386
CP
459 /* Do not fool qdisc_drop_all() */
460 skb->prev = NULL;
461
0afb51e7 462 /* Random duplication */
3cad70bc 463 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor, &q->prng))
0afb51e7
SH
464 ++count;
465
661b7972 466 /* Drop packet? */
e4ae004b
ED
467 if (loss_event(q)) {
468 if (q->ecn && INET_ECN_set_ce(skb))
25331d6c 469 qdisc_qstats_drop(sch); /* mark packet */
e4ae004b
ED
470 else
471 --count;
472 }
0afb51e7 473 if (count == 0) {
25331d6c 474 qdisc_qstats_drop(sch);
520ac30f 475 __qdisc_drop(skb, to_free);
c27f339a 476 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
477 }
478
5a308f40
ED
479 /* If a delay is expected, orphan the skb. (orphaning usually takes
480 * place at TX completion time, so _before_ the link transit delay)
5a308f40 481 */
5080f39e 482 if (q->latency || q->jitter || q->rate)
f2f872f9 483 skb_orphan_partial(skb);
4e8a5201 484
0afb51e7 485 /*
c07ff859
SH
486 * If we need to duplicate packet, then clone it before
487 * original is modified.
0afb51e7 488 */
c07ff859
SH
489 if (count > 1)
490 skb2 = skb_clone(skb, GFP_ATOMIC);
1da177e4 491
c865e5d9
SH
492 /*
493 * Randomized packet corruption.
494 * Make copy if needed since we are modifying
495 * If packet is going to be hardware checksummed, then
496 * do it now in software before we mangle it.
497 */
3cad70bc 498 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor, &q->prng)) {
6071bd1a 499 if (skb_is_gso(skb)) {
3e14c383
JK
500 skb = netem_segment(skb, sch, to_free);
501 if (!skb)
c07ff859
SH
502 goto finish_segs;
503
3e14c383
JK
504 segs = skb->next;
505 skb_mark_not_on_list(skb);
506 qdisc_skb_cb(skb)->pkt_len = skb->len;
6071bd1a
NH
507 }
508
8a6e9c67
ED
509 skb = skb_unshare(skb, GFP_ATOMIC);
510 if (unlikely(!skb)) {
511 qdisc_qstats_drop(sch);
512 goto finish_segs;
513 }
514 if (skb->ip_summed == CHECKSUM_PARTIAL &&
515 skb_checksum_help(skb)) {
516 qdisc_drop(skb, sch, to_free);
a7fa12d1 517 skb = NULL;
6071bd1a
NH
518 goto finish_segs;
519 }
c865e5d9 520
8032bf12
JD
521 skb->data[get_random_u32_below(skb_headlen(skb))] ^=
522 1<<get_random_u32_below(8);
c865e5d9
SH
523 }
524
f8d4bc45 525 if (unlikely(q->t_len >= sch->limit)) {
3e14c383
JK
526 /* re-link segs, so that qdisc_drop_all() frees them all */
527 skb->next = segs;
5845f706 528 qdisc_drop_all(skb, sch, to_free);
c07ff859
SH
529 if (skb2)
530 __qdisc_drop(skb2, to_free);
531 return NET_XMIT_DROP;
532 }
533
534 /*
535 * If doing duplication then re-insert at top of the
536 * qdisc tree, since parent queuer expects that only one
537 * skb will be queued.
538 */
539 if (skb2) {
540 struct Qdisc *rootq = qdisc_root_bh(sch);
541 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
542
543 q->duplicate = 0;
544 rootq->enqueue(skb2, rootq, to_free);
545 q->duplicate = dupsave;
546 skb2 = NULL;
5845f706 547 }
960fb66e 548
25331d6c 549 qdisc_qstats_backlog_inc(sch, skb);
960fb66e 550
5f86173b 551 cb = netem_skb_cb(skb);
cc7ec456 552 if (q->gap == 0 || /* not doing reordering */
a42b4799 553 q->counter < q->gap - 1 || /* inside last reordering gap */
3cad70bc 554 q->reorder < get_crandom(&q->reorder_cor, &q->prng)) {
112f9cb6
DT
555 u64 now;
556 s64 delay;
07aaa115
SH
557
558 delay = tabledist(q->latency, q->jitter,
3cad70bc 559 &q->delay_cor, &q->prng, q->delay_dist);
07aaa115 560
112f9cb6 561 now = ktime_get_ns();
7bc0f28c
HPP
562
563 if (q->rate) {
5080f39e
NU
564 struct netem_skb_cb *last = NULL;
565
566 if (sch->q.tail)
567 last = netem_skb_cb(sch->q.tail);
568 if (q->t_root.rb_node) {
569 struct sk_buff *t_skb;
570 struct netem_skb_cb *t_last;
571
18a4c0ea 572 t_skb = skb_rb_last(&q->t_root);
5080f39e
NU
573 t_last = netem_skb_cb(t_skb);
574 if (!last ||
d66280b1
PO
575 t_last->time_to_send > last->time_to_send)
576 last = t_last;
577 }
578 if (q->t_tail) {
579 struct netem_skb_cb *t_last =
580 netem_skb_cb(q->t_tail);
581
582 if (!last ||
583 t_last->time_to_send > last->time_to_send)
5080f39e 584 last = t_last;
5080f39e 585 }
7bc0f28c 586
aec0a40a 587 if (last) {
7bc0f28c 588 /*
a13d3104
JN
589 * Last packet in queue is reference point (now),
590 * calculate this time bonus and subtract
7bc0f28c
HPP
591 * from delay.
592 */
5080f39e 593 delay -= last->time_to_send - now;
112f9cb6 594 delay = max_t(s64, 0, delay);
5080f39e 595 now = last->time_to_send;
7bc0f28c 596 }
a13d3104 597
bce552fd 598 delay += packet_time_ns(qdisc_pkt_len(skb), q);
7bc0f28c
HPP
599 }
600
7c59e25f 601 cb->time_to_send = now + delay;
1da177e4 602 ++q->counter;
960fb66e 603 tfifo_enqueue(skb, sch);
1da177e4 604 } else {
10297b99 605 /*
0dca51d3
SH
606 * Do re-ordering by putting one out of N packets at the front
607 * of the queue.
608 */
112f9cb6 609 cb->time_to_send = ktime_get_ns();
0dca51d3 610 q->counter = 0;
8ba25dad 611
59697730 612 __qdisc_enqueue_head(skb, &sch->q);
eb101924 613 sch->qstats.requeues++;
378a2f09 614 }
1da177e4 615
6071bd1a 616finish_segs:
c07ff859
SH
617 if (skb2)
618 __qdisc_drop(skb2, to_free);
619
6071bd1a 620 if (segs) {
177b8007 621 unsigned int len, last_len;
c07ff859 622 int rc, nb;
177b8007 623
a7fa12d1
JK
624 len = skb ? skb->len : 0;
625 nb = skb ? 1 : 0;
177b8007 626
6071bd1a
NH
627 while (segs) {
628 skb2 = segs->next;
a8305bff 629 skb_mark_not_on_list(segs);
6071bd1a
NH
630 qdisc_skb_cb(segs)->pkt_len = segs->len;
631 last_len = segs->len;
520ac30f 632 rc = qdisc_enqueue(segs, sch, to_free);
6071bd1a
NH
633 if (rc != NET_XMIT_SUCCESS) {
634 if (net_xmit_drop_count(rc))
635 qdisc_qstats_drop(sch);
636 } else {
637 nb++;
638 len += last_len;
639 }
640 segs = skb2;
641 }
a7fa12d1
JK
642 /* Parent qdiscs accounted for 1 skb of size @prev_len */
643 qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
e0ad032e
JK
644 } else if (!skb) {
645 return NET_XMIT_DROP;
6071bd1a 646 }
10f6dfcf 647 return NET_XMIT_SUCCESS;
1da177e4
LT
648}
649
836af83b
DT
650/* Delay the next round with a new future slot with a
651 * correct number of bytes and packets.
652 */
653
654static void get_slot_next(struct netem_sched_data *q, u64 now)
655{
0a9fe5c3
YS
656 s64 next_delay;
657
658 if (!q->slot_dist)
659 next_delay = q->slot_config.min_delay +
a251c17a 660 (get_random_u32() *
0a9fe5c3
YS
661 (q->slot_config.max_delay -
662 q->slot_config.min_delay) >> 32);
663 else
664 next_delay = tabledist(q->slot_config.dist_delay,
665 (s32)(q->slot_config.dist_jitter),
3cad70bc 666 NULL, &q->prng, q->slot_dist);
0a9fe5c3
YS
667
668 q->slot.slot_next = now + next_delay;
836af83b
DT
669 q->slot.packets_left = q->slot_config.max_packets;
670 q->slot.bytes_left = q->slot_config.max_bytes;
671}
672
d66280b1
PO
673static struct sk_buff *netem_peek(struct netem_sched_data *q)
674{
675 struct sk_buff *skb = skb_rb_first(&q->t_root);
676 u64 t1, t2;
677
678 if (!skb)
679 return q->t_head;
680 if (!q->t_head)
681 return skb;
682
683 t1 = netem_skb_cb(skb)->time_to_send;
684 t2 = netem_skb_cb(q->t_head)->time_to_send;
685 if (t1 < t2)
686 return skb;
687 return q->t_head;
688}
689
690static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
691{
692 if (skb == q->t_head) {
693 q->t_head = skb->next;
694 if (!q->t_head)
695 q->t_tail = NULL;
696 } else {
697 rb_erase(&skb->rbnode, &q->t_root);
698 }
699}
700
1da177e4
LT
701static struct sk_buff *netem_dequeue(struct Qdisc *sch)
702{
703 struct netem_sched_data *q = qdisc_priv(sch);
704 struct sk_buff *skb;
705
50612537 706tfifo_dequeue:
ed760cb8 707 skb = __qdisc_dequeue_head(&sch->q);
771018e7 708 if (skb) {
0ad2a836 709deliver:
f8d4bc45 710 qdisc_qstats_backlog_dec(sch, skb);
aec0a40a
ED
711 qdisc_bstats_update(sch, skb);
712 return skb;
713 }
d66280b1
PO
714 skb = netem_peek(q);
715 if (skb) {
112f9cb6 716 u64 time_to_send;
836af83b 717 u64 now = ktime_get_ns();
36b7bfe0 718
0f9f32ac 719 /* if more time remaining? */
36b7bfe0 720 time_to_send = netem_skb_cb(skb)->time_to_send;
836af83b
DT
721 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
722 get_slot_next(q, now);
aec0a40a 723
d66280b1
PO
724 if (time_to_send <= now && q->slot.slot_next <= now) {
725 netem_erase_head(q, skb);
f8d4bc45 726 q->t_len--;
aec0a40a
ED
727 skb->next = NULL;
728 skb->prev = NULL;
bffa72cf
ED
729 /* skb->dev shares skb->rbnode area,
730 * we need to restore its value.
731 */
732 skb->dev = qdisc_dev(sch);
03c05f0d 733
836af83b
DT
734 if (q->slot.slot_next) {
735 q->slot.packets_left--;
736 q->slot.bytes_left -= qdisc_pkt_len(skb);
737 if (q->slot.packets_left <= 0 ||
738 q->slot.bytes_left <= 0)
739 get_slot_next(q, now);
740 }
741
50612537 742 if (q->qdisc) {
21de12ee 743 unsigned int pkt_len = qdisc_pkt_len(skb);
520ac30f
ED
744 struct sk_buff *to_free = NULL;
745 int err;
50612537 746
520ac30f
ED
747 err = qdisc_enqueue(skb, q->qdisc, &to_free);
748 kfree_skb_list(to_free);
3b3a2a9c
SH
749 if (err != NET_XMIT_SUCCESS) {
750 if (net_xmit_drop_count(err))
751 qdisc_qstats_drop(sch);
f8d4bc45
MO
752 sch->qstats.backlog -= pkt_len;
753 sch->q.qlen--;
638ba508 754 qdisc_tree_reduce_backlog(sch, 1, pkt_len);
50612537
ED
755 }
756 goto tfifo_dequeue;
757 }
f8d4bc45 758 sch->q.qlen--;
aec0a40a 759 goto deliver;
07aaa115 760 }
11274e5a 761
50612537
ED
762 if (q->qdisc) {
763 skb = q->qdisc->ops->dequeue(q->qdisc);
f8d4bc45
MO
764 if (skb) {
765 sch->q.qlen--;
50612537 766 goto deliver;
f8d4bc45 767 }
50612537 768 }
836af83b
DT
769
770 qdisc_watchdog_schedule_ns(&q->watchdog,
771 max(time_to_send,
772 q->slot.slot_next));
0f9f32ac
SH
773 }
774
50612537
ED
775 if (q->qdisc) {
776 skb = q->qdisc->ops->dequeue(q->qdisc);
f8d4bc45
MO
777 if (skb) {
778 sch->q.qlen--;
50612537 779 goto deliver;
f8d4bc45 780 }
50612537 781 }
0f9f32ac 782 return NULL;
1da177e4
LT
783}
784
1da177e4
LT
785static void netem_reset(struct Qdisc *sch)
786{
787 struct netem_sched_data *q = qdisc_priv(sch);
788
50612537 789 qdisc_reset_queue(sch);
ff704050 790 tfifo_reset(sch);
50612537
ED
791 if (q->qdisc)
792 qdisc_reset(q->qdisc);
59cb5c67 793 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
794}
795
6373a9a2 796static void dist_free(struct disttable *d)
797{
4cb28970 798 kvfree(d);
6373a9a2 799}
800
1da177e4
LT
801/*
802 * Distribution data is a variable size payload containing
803 * signed 16 bit values.
804 */
836af83b 805
11b73313 806static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
1da177e4 807{
6373a9a2 808 size_t n = nla_len(attr)/sizeof(__s16);
1e90474c 809 const __s16 *data = nla_data(attr);
1da177e4
LT
810 struct disttable *d;
811 int i;
812
b41d936b 813 if (!n || n > NETEM_DIST_MAX)
1da177e4
LT
814 return -EINVAL;
815
12929198 816 d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
1da177e4
LT
817 if (!d)
818 return -ENOMEM;
819
820 d->size = n;
821 for (i = 0; i < n; i++)
822 d->table[i] = data[i];
10297b99 823
11b73313 824 *tbl = d;
1da177e4
LT
825 return 0;
826}
827
836af83b
DT
828static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
829{
830 const struct tc_netem_slot *c = nla_data(attr);
831
832 q->slot_config = *c;
833 if (q->slot_config.max_packets == 0)
834 q->slot_config.max_packets = INT_MAX;
835 if (q->slot_config.max_bytes == 0)
836 q->slot_config.max_bytes = INT_MAX;
eadd1bef
AN
837
838 /* capping dist_jitter to the range acceptable by tabledist() */
839 q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
840
836af83b
DT
841 q->slot.packets_left = q->slot_config.max_packets;
842 q->slot.bytes_left = q->slot_config.max_bytes;
0a9fe5c3
YS
843 if (q->slot_config.min_delay | q->slot_config.max_delay |
844 q->slot_config.dist_jitter)
836af83b
DT
845 q->slot.slot_next = ktime_get_ns();
846 else
847 q->slot.slot_next = 0;
848}
849
49545a77 850static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
1da177e4 851{
1e90474c 852 const struct tc_netem_corr *c = nla_data(attr);
1da177e4 853
1da177e4
LT
854 init_crandom(&q->delay_cor, c->delay_corr);
855 init_crandom(&q->loss_cor, c->loss_corr);
856 init_crandom(&q->dup_cor, c->dup_corr);
1da177e4
LT
857}
858
49545a77 859static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
0dca51d3 860{
1e90474c 861 const struct tc_netem_reorder *r = nla_data(attr);
0dca51d3 862
0dca51d3
SH
863 q->reorder = r->probability;
864 init_crandom(&q->reorder_cor, r->correlation);
0dca51d3
SH
865}
866
49545a77 867static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
c865e5d9 868{
1e90474c 869 const struct tc_netem_corrupt *r = nla_data(attr);
c865e5d9 870
c865e5d9
SH
871 q->corrupt = r->probability;
872 init_crandom(&q->corrupt_cor, r->correlation);
c865e5d9
SH
873}
874
49545a77 875static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
7bc0f28c 876{
7bc0f28c
HPP
877 const struct tc_netem_rate *r = nla_data(attr);
878
879 q->rate = r->rate;
90b41a1c
HPP
880 q->packet_overhead = r->packet_overhead;
881 q->cell_size = r->cell_size;
809fa972 882 q->cell_overhead = r->cell_overhead;
90b41a1c
HPP
883 if (q->cell_size)
884 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
809fa972
HFS
885 else
886 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
7bc0f28c
HPP
887}
888
49545a77 889static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
661b7972 890{
661b7972 891 const struct nlattr *la;
892 int rem;
893
894 nla_for_each_nested(la, attr, rem) {
895 u16 type = nla_type(la);
896
833fa743 897 switch (type) {
661b7972 898 case NETEM_LOSS_GI: {
899 const struct tc_netem_gimodel *gi = nla_data(la);
900
2494654d 901 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
661b7972 902 pr_info("netem: incorrect gi model size\n");
903 return -EINVAL;
904 }
905
906 q->loss_model = CLG_4_STATES;
907
3fbac2a8 908 q->clg.state = TX_IN_GAP_PERIOD;
661b7972 909 q->clg.a1 = gi->p13;
910 q->clg.a2 = gi->p31;
911 q->clg.a3 = gi->p32;
912 q->clg.a4 = gi->p14;
913 q->clg.a5 = gi->p23;
914 break;
915 }
916
917 case NETEM_LOSS_GE: {
918 const struct tc_netem_gemodel *ge = nla_data(la);
919
2494654d 920 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
921 pr_info("netem: incorrect ge model size\n");
661b7972 922 return -EINVAL;
923 }
924
925 q->loss_model = CLG_GILB_ELL;
3fbac2a8 926 q->clg.state = GOOD_STATE;
661b7972 927 q->clg.a1 = ge->p;
928 q->clg.a2 = ge->r;
929 q->clg.a3 = ge->h;
930 q->clg.a4 = ge->k1;
931 break;
932 }
933
934 default:
935 pr_info("netem: unknown loss type %u\n", type);
936 return -EINVAL;
937 }
938 }
939
940 return 0;
941}
942
27a3421e
PM
943static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
944 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
945 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
946 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
7bc0f28c 947 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
661b7972 948 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
e4ae004b 949 [TCA_NETEM_ECN] = { .type = NLA_U32 },
6a031f67 950 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
99803171
DT
951 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
952 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
836af83b 953 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
4072d97d 954 [TCA_NETEM_PRNG_SEED] = { .type = NLA_U64 },
27a3421e
PM
955};
956
2c10b32b
TG
957static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
958 const struct nla_policy *policy, int len)
959{
960 int nested_len = nla_len(nla) - NLA_ALIGN(len);
961
661b7972 962 if (nested_len < 0) {
963 pr_info("netem: invalid attributes len %d\n", nested_len);
2c10b32b 964 return -EINVAL;
661b7972 965 }
966
2c10b32b 967 if (nested_len >= nla_attr_size(0))
8cb08174
JB
968 return nla_parse_deprecated(tb, maxtype,
969 nla_data(nla) + NLA_ALIGN(len),
970 nested_len, policy, NULL);
661b7972 971
2c10b32b
TG
972 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
973 return 0;
974}
975
c865e5d9 976/* Parse netlink message to set options */
2030721c
AA
977static int netem_change(struct Qdisc *sch, struct nlattr *opt,
978 struct netlink_ext_ack *extack)
1da177e4
LT
979{
980 struct netem_sched_data *q = qdisc_priv(sch);
b03f4672 981 struct nlattr *tb[TCA_NETEM_MAX + 1];
11b73313
ED
982 struct disttable *delay_dist = NULL;
983 struct disttable *slot_dist = NULL;
1da177e4 984 struct tc_netem_qopt *qopt;
54a4b05c
YY
985 struct clgstate old_clg;
986 int old_loss_model = CLG_RANDOM;
1da177e4 987 int ret;
10297b99 988
2c10b32b
TG
989 qopt = nla_data(opt);
990 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
b03f4672
PM
991 if (ret < 0)
992 return ret;
993
11b73313
ED
994 if (tb[TCA_NETEM_DELAY_DIST]) {
995 ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
996 if (ret)
997 goto table_free;
998 }
999
1000 if (tb[TCA_NETEM_SLOT_DIST]) {
1001 ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
1002 if (ret)
1003 goto table_free;
1004 }
1005
2174a08d 1006 sch_tree_lock(sch);
54a4b05c
YY
1007 /* backup q->clg and q->loss_model */
1008 old_clg = q->clg;
1009 old_loss_model = q->loss_model;
1010
1011 if (tb[TCA_NETEM_LOSS]) {
49545a77 1012 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
54a4b05c
YY
1013 if (ret) {
1014 q->loss_model = old_loss_model;
11b73313 1015 q->clg = old_clg;
2174a08d 1016 goto unlock;
54a4b05c
YY
1017 }
1018 } else {
1019 q->loss_model = CLG_RANDOM;
1020 }
1021
11b73313
ED
1022 if (delay_dist)
1023 swap(q->delay_dist, delay_dist);
1024 if (slot_dist)
1025 swap(q->slot_dist, slot_dist);
50612537 1026 sch->limit = qopt->limit;
10297b99 1027
112f9cb6
DT
1028 q->latency = PSCHED_TICKS2NS(qopt->latency);
1029 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
1da177e4
LT
1030 q->limit = qopt->limit;
1031 q->gap = qopt->gap;
0dca51d3 1032 q->counter = 0;
1da177e4
LT
1033 q->loss = qopt->loss;
1034 q->duplicate = qopt->duplicate;
1035
bb2f8cc0
SH
1036 /* for compatibility with earlier versions.
1037 * if gap is set, need to assume 100% probability
0dca51d3 1038 */
a362e0a7
SH
1039 if (q->gap)
1040 q->reorder = ~0;
0dca51d3 1041
265eb67f 1042 if (tb[TCA_NETEM_CORR])
49545a77 1043 get_correlation(q, tb[TCA_NETEM_CORR]);
1da177e4 1044
265eb67f 1045 if (tb[TCA_NETEM_REORDER])
49545a77 1046 get_reorder(q, tb[TCA_NETEM_REORDER]);
1da177e4 1047
265eb67f 1048 if (tb[TCA_NETEM_CORRUPT])
49545a77 1049 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1da177e4 1050
7bc0f28c 1051 if (tb[TCA_NETEM_RATE])
49545a77 1052 get_rate(q, tb[TCA_NETEM_RATE]);
7bc0f28c 1053
6a031f67
YY
1054 if (tb[TCA_NETEM_RATE64])
1055 q->rate = max_t(u64, q->rate,
1056 nla_get_u64(tb[TCA_NETEM_RATE64]));
1057
99803171
DT
1058 if (tb[TCA_NETEM_LATENCY64])
1059 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1060
1061 if (tb[TCA_NETEM_JITTER64])
1062 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1063
e4ae004b
ED
1064 if (tb[TCA_NETEM_ECN])
1065 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1066
836af83b
DT
1067 if (tb[TCA_NETEM_SLOT])
1068 get_slot(q, tb[TCA_NETEM_SLOT]);
1069
eadd1bef
AN
1070 /* capping jitter to the range acceptable by tabledist() */
1071 q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1072
4072d97d
FM
1073 if (tb[TCA_NETEM_PRNG_SEED])
1074 q->prng.seed = nla_get_u64(tb[TCA_NETEM_PRNG_SEED]);
1075 else
1076 q->prng.seed = get_random_u64();
1077 prandom_seed_state(&q->prng.prng_state, q->prng.seed);
1078
2174a08d
ED
1079unlock:
1080 sch_tree_unlock(sch);
0a9fe5c3 1081
11b73313
ED
1082table_free:
1083 dist_free(delay_dist);
1084 dist_free(slot_dist);
1085 return ret;
1da177e4
LT
1086}
1087
e63d7dfd
AA
1088static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1089 struct netlink_ext_ack *extack)
1da177e4
LT
1090{
1091 struct netem_sched_data *q = qdisc_priv(sch);
1092 int ret;
1093
634576a1
NA
1094 qdisc_watchdog_init(&q->watchdog, sch);
1095
1da177e4
LT
1096 if (!opt)
1097 return -EINVAL;
1098
661b7972 1099 q->loss_model = CLG_RANDOM;
2030721c 1100 ret = netem_change(sch, opt, extack);
50612537 1101 if (ret)
250a65f7 1102 pr_info("netem: change failed\n");
1da177e4
LT
1103 return ret;
1104}
1105
1106static void netem_destroy(struct Qdisc *sch)
1107{
1108 struct netem_sched_data *q = qdisc_priv(sch);
1109
59cb5c67 1110 qdisc_watchdog_cancel(&q->watchdog);
50612537 1111 if (q->qdisc)
86bd446b 1112 qdisc_put(q->qdisc);
6373a9a2 1113 dist_free(q->delay_dist);
0a9fe5c3 1114 dist_free(q->slot_dist);
1da177e4
LT
1115}
1116
661b7972 1117static int dump_loss_model(const struct netem_sched_data *q,
1118 struct sk_buff *skb)
1119{
1120 struct nlattr *nest;
1121
ae0be8de 1122 nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
661b7972 1123 if (nest == NULL)
1124 goto nla_put_failure;
1125
1126 switch (q->loss_model) {
1127 case CLG_RANDOM:
1128 /* legacy loss model */
1129 nla_nest_cancel(skb, nest);
1130 return 0; /* no data */
1131
1132 case CLG_4_STATES: {
1133 struct tc_netem_gimodel gi = {
1134 .p13 = q->clg.a1,
1135 .p31 = q->clg.a2,
1136 .p32 = q->clg.a3,
1137 .p14 = q->clg.a4,
1138 .p23 = q->clg.a5,
1139 };
1140
1b34ec43
DM
1141 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1142 goto nla_put_failure;
661b7972 1143 break;
1144 }
1145 case CLG_GILB_ELL: {
1146 struct tc_netem_gemodel ge = {
1147 .p = q->clg.a1,
1148 .r = q->clg.a2,
1149 .h = q->clg.a3,
1150 .k1 = q->clg.a4,
1151 };
1152
1b34ec43
DM
1153 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1154 goto nla_put_failure;
661b7972 1155 break;
1156 }
1157 }
1158
1159 nla_nest_end(skb, nest);
1160 return 0;
1161
1162nla_put_failure:
1163 nla_nest_cancel(skb, nest);
1164 return -1;
1165}
1166
1da177e4
LT
1167static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1168{
1169 const struct netem_sched_data *q = qdisc_priv(sch);
861d7f74 1170 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1da177e4
LT
1171 struct tc_netem_qopt qopt;
1172 struct tc_netem_corr cor;
0dca51d3 1173 struct tc_netem_reorder reorder;
c865e5d9 1174 struct tc_netem_corrupt corrupt;
7bc0f28c 1175 struct tc_netem_rate rate;
836af83b 1176 struct tc_netem_slot slot;
1da177e4 1177
a2b1a5d4 1178 qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
112f9cb6 1179 UINT_MAX);
a2b1a5d4 1180 qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
112f9cb6 1181 UINT_MAX);
1da177e4
LT
1182 qopt.limit = q->limit;
1183 qopt.loss = q->loss;
1184 qopt.gap = q->gap;
1185 qopt.duplicate = q->duplicate;
1b34ec43
DM
1186 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1187 goto nla_put_failure;
1da177e4 1188
99803171
DT
1189 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1190 goto nla_put_failure;
1191
1192 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1193 goto nla_put_failure;
1194
1da177e4
LT
1195 cor.delay_corr = q->delay_cor.rho;
1196 cor.loss_corr = q->loss_cor.rho;
1197 cor.dup_corr = q->dup_cor.rho;
1b34ec43
DM
1198 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1199 goto nla_put_failure;
0dca51d3
SH
1200
1201 reorder.probability = q->reorder;
1202 reorder.correlation = q->reorder_cor.rho;
1b34ec43
DM
1203 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1204 goto nla_put_failure;
0dca51d3 1205
c865e5d9
SH
1206 corrupt.probability = q->corrupt;
1207 corrupt.correlation = q->corrupt_cor.rho;
1b34ec43
DM
1208 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1209 goto nla_put_failure;
c865e5d9 1210
6a031f67 1211 if (q->rate >= (1ULL << 32)) {
2a51c1e8
ND
1212 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1213 TCA_NETEM_PAD))
6a031f67
YY
1214 goto nla_put_failure;
1215 rate.rate = ~0U;
1216 } else {
1217 rate.rate = q->rate;
1218 }
90b41a1c
HPP
1219 rate.packet_overhead = q->packet_overhead;
1220 rate.cell_size = q->cell_size;
1221 rate.cell_overhead = q->cell_overhead;
1b34ec43
DM
1222 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1223 goto nla_put_failure;
7bc0f28c 1224
e4ae004b
ED
1225 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1226 goto nla_put_failure;
1227
661b7972 1228 if (dump_loss_model(q, skb) != 0)
1229 goto nla_put_failure;
1230
0a9fe5c3
YS
1231 if (q->slot_config.min_delay | q->slot_config.max_delay |
1232 q->slot_config.dist_jitter) {
836af83b
DT
1233 slot = q->slot_config;
1234 if (slot.max_packets == INT_MAX)
1235 slot.max_packets = 0;
1236 if (slot.max_bytes == INT_MAX)
1237 slot.max_bytes = 0;
1238 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1239 goto nla_put_failure;
1240 }
1241
4072d97d
FM
1242 if (nla_put_u64_64bit(skb, TCA_NETEM_PRNG_SEED, q->prng.seed,
1243 TCA_NETEM_PAD))
1244 goto nla_put_failure;
1245
861d7f74 1246 return nla_nest_end(skb, nla);
1da177e4 1247
1e90474c 1248nla_put_failure:
861d7f74 1249 nlmsg_trim(skb, nla);
1da177e4
LT
1250 return -1;
1251}
1252
10f6dfcf 1253static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1254 struct sk_buff *skb, struct tcmsg *tcm)
1255{
1256 struct netem_sched_data *q = qdisc_priv(sch);
1257
50612537 1258 if (cl != 1 || !q->qdisc) /* only one class */
10f6dfcf 1259 return -ENOENT;
1260
1261 tcm->tcm_handle |= TC_H_MIN(1);
1262 tcm->tcm_info = q->qdisc->handle;
1263
1264 return 0;
1265}
1266
1267static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 1268 struct Qdisc **old, struct netlink_ext_ack *extack)
10f6dfcf 1269{
1270 struct netem_sched_data *q = qdisc_priv(sch);
1271
86a7996c 1272 *old = qdisc_replace(sch, new, &q->qdisc);
10f6dfcf 1273 return 0;
1274}
1275
1276static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1277{
1278 struct netem_sched_data *q = qdisc_priv(sch);
1279 return q->qdisc;
1280}
1281
143976ce 1282static unsigned long netem_find(struct Qdisc *sch, u32 classid)
10f6dfcf 1283{
1284 return 1;
1285}
1286
10f6dfcf 1287static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1288{
1289 if (!walker->stop) {
e046fa89
ZS
1290 if (!tc_qdisc_stats_dump(sch, 1, walker))
1291 return;
10f6dfcf 1292 }
1293}
1294
1295static const struct Qdisc_class_ops netem_class_ops = {
1296 .graft = netem_graft,
1297 .leaf = netem_leaf,
143976ce 1298 .find = netem_find,
10f6dfcf 1299 .walk = netem_walk,
1300 .dump = netem_dump_class,
1301};
1302
20fea08b 1303static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1da177e4 1304 .id = "netem",
10f6dfcf 1305 .cl_ops = &netem_class_ops,
1da177e4
LT
1306 .priv_size = sizeof(struct netem_sched_data),
1307 .enqueue = netem_enqueue,
1308 .dequeue = netem_dequeue,
77be155c 1309 .peek = qdisc_peek_dequeued,
1da177e4
LT
1310 .init = netem_init,
1311 .reset = netem_reset,
1312 .destroy = netem_destroy,
1313 .change = netem_change,
1314 .dump = netem_dump,
1315 .owner = THIS_MODULE,
1316};
241a94ab 1317MODULE_ALIAS_NET_SCH("netem");
1da177e4
LT
1318
1319
1320static int __init netem_module_init(void)
1321{
eb229c4c 1322 pr_info("netem: version " VERSION "\n");
1da177e4
LT
1323 return register_qdisc(&netem_qdisc_ops);
1324}
1325static void __exit netem_module_exit(void)
1326{
1327 unregister_qdisc(&netem_qdisc_ops);
1328}
1329module_init(netem_module_init)
1330module_exit(netem_module_exit)
1331MODULE_LICENSE("GPL");
f96118c5 1332MODULE_DESCRIPTION("Network characteristics emulator qdisc");