Merge drm/drm-next into drm-intel-next-queued
[linux-2.6-block.git] / net / sched / sch_netem.c
CommitLineData
1da177e4
LT
1/*
2 * net/sched/sch_netem.c Network emulator
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
798b6b19 7 * 2 of the License.
1da177e4
LT
8 *
9 * Many of the algorithms and ideas for this came from
10297b99 10 * NIST Net which is not copyrighted.
1da177e4
LT
11 *
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
14 */
15
b7f080cf 16#include <linux/mm.h>
1da177e4 17#include <linux/module.h>
5a0e3ad6 18#include <linux/slab.h>
1da177e4
LT
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/errno.h>
1da177e4 22#include <linux/skbuff.h>
78776d3f 23#include <linux/vmalloc.h>
1da177e4 24#include <linux/rtnetlink.h>
90b41a1c 25#include <linux/reciprocal_div.h>
aec0a40a 26#include <linux/rbtree.h>
1da177e4 27
dc5fc579 28#include <net/netlink.h>
1da177e4 29#include <net/pkt_sched.h>
e4ae004b 30#include <net/inet_ecn.h>
1da177e4 31
250a65f7 32#define VERSION "1.3"
eb229c4c 33
1da177e4
LT
34/* Network Emulation Queuing algorithm.
35 ====================================
36
37 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
38 Network Emulation Tool
39 [2] Luigi Rizzo, DummyNet for FreeBSD
40
41 ----------------------------------------------------------------
42
43 This started out as a simple way to delay outgoing packets to
44 test TCP but has grown to include most of the functionality
45 of a full blown network emulator like NISTnet. It can delay
46 packets and add random jitter (and correlation). The random
47 distribution can be loaded from a table as well to provide
48 normal, Pareto, or experimental curves. Packet loss,
49 duplication, and reordering can also be emulated.
50
51 This qdisc does not do classification that can be handled in
52 layering other disciplines. It does not need to do bandwidth
53 control either since that can be handled by using token
54 bucket or other rate control.
661b7972 55
56 Correlated Loss Generator models
57
58 Added generation of correlated loss according to the
59 "Gilbert-Elliot" model, a 4-state markov model.
60
61 References:
62 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64 and intuitive loss model for packet networks and its implementation
65 in the Netem module in the Linux kernel", available in [1]
66
67 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68 Fabio Ludovici <fabio.ludovici at yahoo.it>
1da177e4
LT
69*/
70
0a9fe5c3
YS
71struct disttable {
72 u32 size;
73 s16 table[0];
74};
75
1da177e4 76struct netem_sched_data {
aec0a40a
ED
77 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
78 struct rb_root t_root;
50612537 79
d66280b1
PO
80 /* a linear queue; reduces rbtree rebalancing when jitter is low */
81 struct sk_buff *t_head;
82 struct sk_buff *t_tail;
83
50612537 84 /* optional qdisc for classful handling (NULL at netem init) */
1da177e4 85 struct Qdisc *qdisc;
50612537 86
59cb5c67 87 struct qdisc_watchdog watchdog;
1da177e4 88
112f9cb6
DT
89 s64 latency;
90 s64 jitter;
b407621c 91
1da177e4 92 u32 loss;
e4ae004b 93 u32 ecn;
1da177e4
LT
94 u32 limit;
95 u32 counter;
96 u32 gap;
1da177e4 97 u32 duplicate;
0dca51d3 98 u32 reorder;
c865e5d9 99 u32 corrupt;
6a031f67 100 u64 rate;
90b41a1c
HPP
101 s32 packet_overhead;
102 u32 cell_size;
809fa972 103 struct reciprocal_value cell_size_reciprocal;
90b41a1c 104 s32 cell_overhead;
1da177e4
LT
105
106 struct crndstate {
b407621c
SH
107 u32 last;
108 u32 rho;
c865e5d9 109 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
1da177e4 110
0a9fe5c3 111 struct disttable *delay_dist;
661b7972 112
113 enum {
114 CLG_RANDOM,
115 CLG_4_STATES,
116 CLG_GILB_ELL,
117 } loss_model;
118
a6e2fe17
YY
119 enum {
120 TX_IN_GAP_PERIOD = 1,
121 TX_IN_BURST_PERIOD,
122 LOST_IN_GAP_PERIOD,
123 LOST_IN_BURST_PERIOD,
124 } _4_state_model;
125
c045a734
YY
126 enum {
127 GOOD_STATE = 1,
128 BAD_STATE,
129 } GE_state_model;
130
661b7972 131 /* Correlated Loss Generation models */
132 struct clgstate {
133 /* state of the Markov chain */
134 u8 state;
135
136 /* 4-states and Gilbert-Elliot models */
137 u32 a1; /* p13 for 4-states or p for GE */
138 u32 a2; /* p31 for 4-states or r for GE */
139 u32 a3; /* p32 for 4-states or h for GE */
140 u32 a4; /* p14 for 4-states or 1-k for GE */
141 u32 a5; /* p23 used only in 4-states */
142 } clg;
143
836af83b
DT
144 struct tc_netem_slot slot_config;
145 struct slotstate {
146 u64 slot_next;
147 s32 packets_left;
148 s32 bytes_left;
149 } slot;
150
0a9fe5c3 151 struct disttable *slot_dist;
1da177e4
LT
152};
153
50612537
ED
154/* Time stamp put into socket buffer control block
155 * Only valid when skbs are in our internal t(ime)fifo queue.
56b17425
ED
156 *
157 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
158 * and skb->next & skb->prev are scratch space for a qdisc,
159 * we save skb->tstamp value in skb->cb[] before destroying it.
50612537 160 */
1da177e4 161struct netem_skb_cb {
112f9cb6 162 u64 time_to_send;
1da177e4
LT
163};
164
5f86173b
JK
165static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
166{
aec0a40a 167 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
16bda13d 168 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
175f9c1b 169 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
5f86173b
JK
170}
171
1da177e4
LT
172/* init_crandom - initialize correlated random number generator
173 * Use entropy source for initial seed.
174 */
175static void init_crandom(struct crndstate *state, unsigned long rho)
176{
177 state->rho = rho;
63862b5b 178 state->last = prandom_u32();
1da177e4
LT
179}
180
181/* get_crandom - correlated random number generator
182 * Next number depends on last value.
183 * rho is scaled to avoid floating point.
184 */
b407621c 185static u32 get_crandom(struct crndstate *state)
1da177e4
LT
186{
187 u64 value, rho;
188 unsigned long answer;
189
0a9fe5c3 190 if (!state || state->rho == 0) /* no correlation */
63862b5b 191 return prandom_u32();
1da177e4 192
63862b5b 193 value = prandom_u32();
1da177e4
LT
194 rho = (u64)state->rho + 1;
195 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
196 state->last = answer;
197 return answer;
198}
199
661b7972 200/* loss_4state - 4-state model loss generator
201 * Generates losses according to the 4-state Markov chain adopted in
202 * the GI (General and Intuitive) loss model.
203 */
204static bool loss_4state(struct netem_sched_data *q)
205{
206 struct clgstate *clg = &q->clg;
63862b5b 207 u32 rnd = prandom_u32();
661b7972 208
209 /*
25985edc 210 * Makes a comparison between rnd and the transition
661b7972 211 * probabilities outgoing from the current state, then decides the
212 * next state and if the next packet has to be transmitted or lost.
213 * The four states correspond to:
a6e2fe17
YY
214 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
215 * LOST_IN_BURST_PERIOD => isolated losses within a gap period
216 * LOST_IN_GAP_PERIOD => lost packets within a burst period
217 * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
661b7972 218 */
219 switch (clg->state) {
a6e2fe17 220 case TX_IN_GAP_PERIOD:
661b7972 221 if (rnd < clg->a4) {
a6e2fe17 222 clg->state = LOST_IN_BURST_PERIOD;
661b7972 223 return true;
ab6c27be 224 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
a6e2fe17 225 clg->state = LOST_IN_GAP_PERIOD;
661b7972 226 return true;
a6e2fe17
YY
227 } else if (clg->a1 + clg->a4 < rnd) {
228 clg->state = TX_IN_GAP_PERIOD;
229 }
661b7972 230
231 break;
a6e2fe17 232 case TX_IN_BURST_PERIOD:
661b7972 233 if (rnd < clg->a5) {
a6e2fe17 234 clg->state = LOST_IN_GAP_PERIOD;
661b7972 235 return true;
a6e2fe17
YY
236 } else {
237 clg->state = TX_IN_BURST_PERIOD;
238 }
661b7972 239
240 break;
a6e2fe17 241 case LOST_IN_GAP_PERIOD:
661b7972 242 if (rnd < clg->a3)
a6e2fe17 243 clg->state = TX_IN_BURST_PERIOD;
661b7972 244 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
a6e2fe17 245 clg->state = TX_IN_GAP_PERIOD;
661b7972 246 } else if (clg->a2 + clg->a3 < rnd) {
a6e2fe17 247 clg->state = LOST_IN_GAP_PERIOD;
661b7972 248 return true;
249 }
250 break;
a6e2fe17
YY
251 case LOST_IN_BURST_PERIOD:
252 clg->state = TX_IN_GAP_PERIOD;
661b7972 253 break;
254 }
255
256 return false;
257}
258
259/* loss_gilb_ell - Gilbert-Elliot model loss generator
260 * Generates losses according to the Gilbert-Elliot loss model or
261 * its special cases (Gilbert or Simple Gilbert)
262 *
25985edc 263 * Makes a comparison between random number and the transition
661b7972 264 * probabilities outgoing from the current state, then decides the
25985edc 265 * next state. A second random number is extracted and the comparison
661b7972 266 * with the loss probability of the current state decides if the next
267 * packet will be transmitted or lost.
268 */
269static bool loss_gilb_ell(struct netem_sched_data *q)
270{
271 struct clgstate *clg = &q->clg;
272
273 switch (clg->state) {
c045a734 274 case GOOD_STATE:
63862b5b 275 if (prandom_u32() < clg->a1)
c045a734 276 clg->state = BAD_STATE;
63862b5b 277 if (prandom_u32() < clg->a4)
661b7972 278 return true;
7c2781fa 279 break;
c045a734 280 case BAD_STATE:
63862b5b 281 if (prandom_u32() < clg->a2)
c045a734 282 clg->state = GOOD_STATE;
63862b5b 283 if (prandom_u32() > clg->a3)
661b7972 284 return true;
285 }
286
287 return false;
288}
289
290static bool loss_event(struct netem_sched_data *q)
291{
292 switch (q->loss_model) {
293 case CLG_RANDOM:
294 /* Random packet drop 0 => none, ~0 => all */
295 return q->loss && q->loss >= get_crandom(&q->loss_cor);
296
297 case CLG_4_STATES:
298 /* 4state loss model algorithm (used also for GI model)
299 * Extracts a value from the markov 4 state loss generator,
300 * if it is 1 drops a packet and if needed writes the event in
301 * the kernel logs
302 */
303 return loss_4state(q);
304
305 case CLG_GILB_ELL:
306 /* Gilbert-Elliot loss model algorithm
307 * Extracts a value from the Gilbert-Elliot loss generator,
308 * if it is 1 drops a packet and if needed writes the event in
309 * the kernel logs
310 */
311 return loss_gilb_ell(q);
312 }
313
314 return false; /* not reached */
315}
316
317
1da177e4
LT
318/* tabledist - return a pseudo-randomly distributed value with mean mu and
319 * std deviation sigma. Uses table lookup to approximate the desired
320 * distribution, and a uniformly-distributed pseudo-random source.
321 */
9b0ed891 322static s64 tabledist(s64 mu, s32 sigma,
112f9cb6 323 struct crndstate *state,
9b0ed891 324 const struct disttable *dist)
1da177e4 325{
112f9cb6 326 s64 x;
b407621c
SH
327 long t;
328 u32 rnd;
1da177e4
LT
329
330 if (sigma == 0)
331 return mu;
332
333 rnd = get_crandom(state);
334
335 /* default uniform distribution */
10297b99 336 if (dist == NULL)
043e337f 337 return ((rnd % (2 * sigma)) + mu) - sigma;
1da177e4
LT
338
339 t = dist->table[rnd % dist->size];
340 x = (sigma % NETEM_DIST_SCALE) * t;
341 if (x >= 0)
342 x += NETEM_DIST_SCALE/2;
343 else
344 x -= NETEM_DIST_SCALE/2;
345
346 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
347}
348
bce552fd 349static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
7bc0f28c 350{
90b41a1c
HPP
351 len += q->packet_overhead;
352
353 if (q->cell_size) {
354 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
355
356 if (len > cells * q->cell_size) /* extra cell needed for remainder */
357 cells++;
358 len = cells * (q->cell_size + q->cell_overhead);
359 }
bce552fd
SH
360
361 return div64_u64(len * NSEC_PER_SEC, q->rate);
7bc0f28c
HPP
362}
363
ff704050 364static void tfifo_reset(struct Qdisc *sch)
365{
366 struct netem_sched_data *q = qdisc_priv(sch);
3aa605f2 367 struct rb_node *p = rb_first(&q->t_root);
ff704050 368
3aa605f2 369 while (p) {
18a4c0ea 370 struct sk_buff *skb = rb_to_skb(p);
ff704050 371
3aa605f2
ED
372 p = rb_next(p);
373 rb_erase(&skb->rbnode, &q->t_root);
2f08a9a1 374 rtnl_kfree_skbs(skb, skb);
ff704050 375 }
d66280b1
PO
376
377 rtnl_kfree_skbs(q->t_head, q->t_tail);
378 q->t_head = NULL;
379 q->t_tail = NULL;
ff704050 380}
381
960fb66e 382static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
50612537 383{
aec0a40a 384 struct netem_sched_data *q = qdisc_priv(sch);
112f9cb6 385 u64 tnext = netem_skb_cb(nskb)->time_to_send;
50612537 386
d66280b1
PO
387 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
388 if (q->t_tail)
389 q->t_tail->next = nskb;
aec0a40a 390 else
d66280b1
PO
391 q->t_head = nskb;
392 q->t_tail = nskb;
393 } else {
394 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
395
396 while (*p) {
397 struct sk_buff *skb;
398
399 parent = *p;
400 skb = rb_to_skb(parent);
401 if (tnext >= netem_skb_cb(skb)->time_to_send)
402 p = &parent->rb_right;
403 else
404 p = &parent->rb_left;
405 }
406 rb_link_node(&nskb->rbnode, parent, p);
407 rb_insert_color(&nskb->rbnode, &q->t_root);
50612537 408 }
aec0a40a 409 sch->q.qlen++;
50612537
ED
410}
411
6071bd1a
NH
412/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
413 * when we statistically choose to corrupt one, we instead segment it, returning
414 * the first packet to be corrupted, and re-enqueue the remaining frames
415 */
520ac30f
ED
416static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
417 struct sk_buff **to_free)
6071bd1a
NH
418{
419 struct sk_buff *segs;
420 netdev_features_t features = netif_skb_features(skb);
421
422 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
423
424 if (IS_ERR_OR_NULL(segs)) {
520ac30f 425 qdisc_drop(skb, sch, to_free);
6071bd1a
NH
426 return NULL;
427 }
428 consume_skb(skb);
429 return segs;
430}
431
0afb51e7
SH
432/*
433 * Insert one skb into qdisc.
434 * Note: parent depends on return value to account for queue length.
435 * NET_XMIT_DROP: queue length didn't change.
436 * NET_XMIT_SUCCESS: one skb was queued.
437 */
520ac30f
ED
438static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
439 struct sk_buff **to_free)
1da177e4
LT
440{
441 struct netem_sched_data *q = qdisc_priv(sch);
89e1df74
GC
442 /* We don't fill cb now as skb_unshare() may invalidate it */
443 struct netem_skb_cb *cb;
0afb51e7 444 struct sk_buff *skb2;
6071bd1a
NH
445 struct sk_buff *segs = NULL;
446 unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
447 int nb = 0;
0afb51e7 448 int count = 1;
6071bd1a 449 int rc = NET_XMIT_SUCCESS;
5845f706 450 int rc_drop = NET_XMIT_DROP;
1da177e4 451
9410d386
CP
452 /* Do not fool qdisc_drop_all() */
453 skb->prev = NULL;
454
0afb51e7
SH
455 /* Random duplication */
456 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
457 ++count;
458
661b7972 459 /* Drop packet? */
e4ae004b
ED
460 if (loss_event(q)) {
461 if (q->ecn && INET_ECN_set_ce(skb))
25331d6c 462 qdisc_qstats_drop(sch); /* mark packet */
e4ae004b
ED
463 else
464 --count;
465 }
0afb51e7 466 if (count == 0) {
25331d6c 467 qdisc_qstats_drop(sch);
520ac30f 468 __qdisc_drop(skb, to_free);
c27f339a 469 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1da177e4
LT
470 }
471
5a308f40
ED
472 /* If a delay is expected, orphan the skb. (orphaning usually takes
473 * place at TX completion time, so _before_ the link transit delay)
5a308f40 474 */
5080f39e 475 if (q->latency || q->jitter || q->rate)
f2f872f9 476 skb_orphan_partial(skb);
4e8a5201 477
0afb51e7
SH
478 /*
479 * If we need to duplicate packet, then re-insert at top of the
480 * qdisc tree, since parent queuer expects that only one
481 * skb will be queued.
482 */
483 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
7698b4fc 484 struct Qdisc *rootq = qdisc_root(sch);
0afb51e7 485 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
0afb51e7 486
b396cca6 487 q->duplicate = 0;
520ac30f 488 rootq->enqueue(skb2, rootq, to_free);
0afb51e7 489 q->duplicate = dupsave;
5845f706 490 rc_drop = NET_XMIT_SUCCESS;
1da177e4
LT
491 }
492
c865e5d9
SH
493 /*
494 * Randomized packet corruption.
495 * Make copy if needed since we are modifying
496 * If packet is going to be hardware checksummed, then
497 * do it now in software before we mangle it.
498 */
499 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
6071bd1a 500 if (skb_is_gso(skb)) {
520ac30f 501 segs = netem_segment(skb, sch, to_free);
6071bd1a 502 if (!segs)
5845f706 503 return rc_drop;
6071bd1a
NH
504 } else {
505 segs = skb;
506 }
507
508 skb = segs;
509 segs = segs->next;
510
8a6e9c67
ED
511 skb = skb_unshare(skb, GFP_ATOMIC);
512 if (unlikely(!skb)) {
513 qdisc_qstats_drop(sch);
514 goto finish_segs;
515 }
516 if (skb->ip_summed == CHECKSUM_PARTIAL &&
517 skb_checksum_help(skb)) {
518 qdisc_drop(skb, sch, to_free);
6071bd1a
NH
519 goto finish_segs;
520 }
c865e5d9 521
63862b5b
AH
522 skb->data[prandom_u32() % skb_headlen(skb)] ^=
523 1<<(prandom_u32() % 8);
c865e5d9
SH
524 }
525
5845f706
SL
526 if (unlikely(sch->q.qlen >= sch->limit)) {
527 qdisc_drop_all(skb, sch, to_free);
528 return rc_drop;
529 }
960fb66e 530
25331d6c 531 qdisc_qstats_backlog_inc(sch, skb);
960fb66e 532
5f86173b 533 cb = netem_skb_cb(skb);
cc7ec456 534 if (q->gap == 0 || /* not doing reordering */
a42b4799 535 q->counter < q->gap - 1 || /* inside last reordering gap */
f64f9e71 536 q->reorder < get_crandom(&q->reorder_cor)) {
112f9cb6
DT
537 u64 now;
538 s64 delay;
07aaa115
SH
539
540 delay = tabledist(q->latency, q->jitter,
541 &q->delay_cor, q->delay_dist);
542
112f9cb6 543 now = ktime_get_ns();
7bc0f28c
HPP
544
545 if (q->rate) {
5080f39e
NU
546 struct netem_skb_cb *last = NULL;
547
548 if (sch->q.tail)
549 last = netem_skb_cb(sch->q.tail);
550 if (q->t_root.rb_node) {
551 struct sk_buff *t_skb;
552 struct netem_skb_cb *t_last;
553
18a4c0ea 554 t_skb = skb_rb_last(&q->t_root);
5080f39e
NU
555 t_last = netem_skb_cb(t_skb);
556 if (!last ||
d66280b1
PO
557 t_last->time_to_send > last->time_to_send)
558 last = t_last;
559 }
560 if (q->t_tail) {
561 struct netem_skb_cb *t_last =
562 netem_skb_cb(q->t_tail);
563
564 if (!last ||
565 t_last->time_to_send > last->time_to_send)
5080f39e 566 last = t_last;
5080f39e 567 }
7bc0f28c 568
aec0a40a 569 if (last) {
7bc0f28c 570 /*
a13d3104
JN
571 * Last packet in queue is reference point (now),
572 * calculate this time bonus and subtract
7bc0f28c
HPP
573 * from delay.
574 */
5080f39e 575 delay -= last->time_to_send - now;
112f9cb6 576 delay = max_t(s64, 0, delay);
5080f39e 577 now = last->time_to_send;
7bc0f28c 578 }
a13d3104 579
bce552fd 580 delay += packet_time_ns(qdisc_pkt_len(skb), q);
7bc0f28c
HPP
581 }
582
7c59e25f 583 cb->time_to_send = now + delay;
1da177e4 584 ++q->counter;
960fb66e 585 tfifo_enqueue(skb, sch);
1da177e4 586 } else {
10297b99 587 /*
0dca51d3
SH
588 * Do re-ordering by putting one out of N packets at the front
589 * of the queue.
590 */
112f9cb6 591 cb->time_to_send = ktime_get_ns();
0dca51d3 592 q->counter = 0;
8ba25dad 593
59697730 594 __qdisc_enqueue_head(skb, &sch->q);
eb101924 595 sch->qstats.requeues++;
378a2f09 596 }
1da177e4 597
6071bd1a
NH
598finish_segs:
599 if (segs) {
600 while (segs) {
601 skb2 = segs->next;
a8305bff 602 skb_mark_not_on_list(segs);
6071bd1a
NH
603 qdisc_skb_cb(segs)->pkt_len = segs->len;
604 last_len = segs->len;
520ac30f 605 rc = qdisc_enqueue(segs, sch, to_free);
6071bd1a
NH
606 if (rc != NET_XMIT_SUCCESS) {
607 if (net_xmit_drop_count(rc))
608 qdisc_qstats_drop(sch);
609 } else {
610 nb++;
611 len += last_len;
612 }
613 segs = skb2;
614 }
615 sch->q.qlen += nb;
616 if (nb > 1)
617 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
618 }
10f6dfcf 619 return NET_XMIT_SUCCESS;
1da177e4
LT
620}
621
836af83b
DT
622/* Delay the next round with a new future slot with a
623 * correct number of bytes and packets.
624 */
625
626static void get_slot_next(struct netem_sched_data *q, u64 now)
627{
0a9fe5c3
YS
628 s64 next_delay;
629
630 if (!q->slot_dist)
631 next_delay = q->slot_config.min_delay +
632 (prandom_u32() *
633 (q->slot_config.max_delay -
634 q->slot_config.min_delay) >> 32);
635 else
636 next_delay = tabledist(q->slot_config.dist_delay,
637 (s32)(q->slot_config.dist_jitter),
638 NULL, q->slot_dist);
639
640 q->slot.slot_next = now + next_delay;
836af83b
DT
641 q->slot.packets_left = q->slot_config.max_packets;
642 q->slot.bytes_left = q->slot_config.max_bytes;
643}
644
d66280b1
PO
645static struct sk_buff *netem_peek(struct netem_sched_data *q)
646{
647 struct sk_buff *skb = skb_rb_first(&q->t_root);
648 u64 t1, t2;
649
650 if (!skb)
651 return q->t_head;
652 if (!q->t_head)
653 return skb;
654
655 t1 = netem_skb_cb(skb)->time_to_send;
656 t2 = netem_skb_cb(q->t_head)->time_to_send;
657 if (t1 < t2)
658 return skb;
659 return q->t_head;
660}
661
662static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
663{
664 if (skb == q->t_head) {
665 q->t_head = skb->next;
666 if (!q->t_head)
667 q->t_tail = NULL;
668 } else {
669 rb_erase(&skb->rbnode, &q->t_root);
670 }
671}
672
1da177e4
LT
673static struct sk_buff *netem_dequeue(struct Qdisc *sch)
674{
675 struct netem_sched_data *q = qdisc_priv(sch);
676 struct sk_buff *skb;
677
50612537 678tfifo_dequeue:
ed760cb8 679 skb = __qdisc_dequeue_head(&sch->q);
771018e7 680 if (skb) {
25331d6c 681 qdisc_qstats_backlog_dec(sch, skb);
0ad2a836 682deliver:
aec0a40a
ED
683 qdisc_bstats_update(sch, skb);
684 return skb;
685 }
d66280b1
PO
686 skb = netem_peek(q);
687 if (skb) {
112f9cb6 688 u64 time_to_send;
836af83b 689 u64 now = ktime_get_ns();
36b7bfe0 690
0f9f32ac 691 /* if more time remaining? */
36b7bfe0 692 time_to_send = netem_skb_cb(skb)->time_to_send;
836af83b
DT
693 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
694 get_slot_next(q, now);
aec0a40a 695
d66280b1
PO
696 if (time_to_send <= now && q->slot.slot_next <= now) {
697 netem_erase_head(q, skb);
aec0a40a 698 sch->q.qlen--;
0ad2a836 699 qdisc_qstats_backlog_dec(sch, skb);
aec0a40a
ED
700 skb->next = NULL;
701 skb->prev = NULL;
bffa72cf
ED
702 /* skb->dev shares skb->rbnode area,
703 * we need to restore its value.
704 */
705 skb->dev = qdisc_dev(sch);
03c05f0d 706
836af83b
DT
707 if (q->slot.slot_next) {
708 q->slot.packets_left--;
709 q->slot.bytes_left -= qdisc_pkt_len(skb);
710 if (q->slot.packets_left <= 0 ||
711 q->slot.bytes_left <= 0)
712 get_slot_next(q, now);
713 }
714
50612537 715 if (q->qdisc) {
21de12ee 716 unsigned int pkt_len = qdisc_pkt_len(skb);
520ac30f
ED
717 struct sk_buff *to_free = NULL;
718 int err;
50612537 719
520ac30f
ED
720 err = qdisc_enqueue(skb, q->qdisc, &to_free);
721 kfree_skb_list(to_free);
21de12ee
ED
722 if (err != NET_XMIT_SUCCESS &&
723 net_xmit_drop_count(err)) {
724 qdisc_qstats_drop(sch);
725 qdisc_tree_reduce_backlog(sch, 1,
726 pkt_len);
50612537
ED
727 }
728 goto tfifo_dequeue;
729 }
aec0a40a 730 goto deliver;
07aaa115 731 }
11274e5a 732
50612537
ED
733 if (q->qdisc) {
734 skb = q->qdisc->ops->dequeue(q->qdisc);
735 if (skb)
736 goto deliver;
737 }
836af83b
DT
738
739 qdisc_watchdog_schedule_ns(&q->watchdog,
740 max(time_to_send,
741 q->slot.slot_next));
0f9f32ac
SH
742 }
743
50612537
ED
744 if (q->qdisc) {
745 skb = q->qdisc->ops->dequeue(q->qdisc);
746 if (skb)
747 goto deliver;
748 }
0f9f32ac 749 return NULL;
1da177e4
LT
750}
751
1da177e4
LT
752static void netem_reset(struct Qdisc *sch)
753{
754 struct netem_sched_data *q = qdisc_priv(sch);
755
50612537 756 qdisc_reset_queue(sch);
ff704050 757 tfifo_reset(sch);
50612537
ED
758 if (q->qdisc)
759 qdisc_reset(q->qdisc);
59cb5c67 760 qdisc_watchdog_cancel(&q->watchdog);
1da177e4
LT
761}
762
6373a9a2 763static void dist_free(struct disttable *d)
764{
4cb28970 765 kvfree(d);
6373a9a2 766}
767
1da177e4
LT
768/*
769 * Distribution data is a variable size payload containing
770 * signed 16 bit values.
771 */
836af83b 772
0a9fe5c3
YS
773static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
774 const struct nlattr *attr)
1da177e4 775{
6373a9a2 776 size_t n = nla_len(attr)/sizeof(__s16);
1e90474c 777 const __s16 *data = nla_data(attr);
7698b4fc 778 spinlock_t *root_lock;
1da177e4
LT
779 struct disttable *d;
780 int i;
781
df173bda 782 if (n > NETEM_DIST_MAX)
1da177e4
LT
783 return -EINVAL;
784
752ade68 785 d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
1da177e4
LT
786 if (!d)
787 return -ENOMEM;
788
789 d->size = n;
790 for (i = 0; i < n; i++)
791 d->table[i] = data[i];
10297b99 792
102396ae 793 root_lock = qdisc_root_sleeping_lock(sch);
7698b4fc
DM
794
795 spin_lock_bh(root_lock);
0a9fe5c3 796 swap(*tbl, d);
7698b4fc 797 spin_unlock_bh(root_lock);
bb52c7ac
ED
798
799 dist_free(d);
1da177e4
LT
800 return 0;
801}
802
836af83b
DT
803static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
804{
805 const struct tc_netem_slot *c = nla_data(attr);
806
807 q->slot_config = *c;
808 if (q->slot_config.max_packets == 0)
809 q->slot_config.max_packets = INT_MAX;
810 if (q->slot_config.max_bytes == 0)
811 q->slot_config.max_bytes = INT_MAX;
812 q->slot.packets_left = q->slot_config.max_packets;
813 q->slot.bytes_left = q->slot_config.max_bytes;
0a9fe5c3
YS
814 if (q->slot_config.min_delay | q->slot_config.max_delay |
815 q->slot_config.dist_jitter)
836af83b
DT
816 q->slot.slot_next = ktime_get_ns();
817 else
818 q->slot.slot_next = 0;
819}
820
49545a77 821static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
1da177e4 822{
1e90474c 823 const struct tc_netem_corr *c = nla_data(attr);
1da177e4 824
1da177e4
LT
825 init_crandom(&q->delay_cor, c->delay_corr);
826 init_crandom(&q->loss_cor, c->loss_corr);
827 init_crandom(&q->dup_cor, c->dup_corr);
1da177e4
LT
828}
829
49545a77 830static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
0dca51d3 831{
1e90474c 832 const struct tc_netem_reorder *r = nla_data(attr);
0dca51d3 833
0dca51d3
SH
834 q->reorder = r->probability;
835 init_crandom(&q->reorder_cor, r->correlation);
0dca51d3
SH
836}
837
49545a77 838static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
c865e5d9 839{
1e90474c 840 const struct tc_netem_corrupt *r = nla_data(attr);
c865e5d9 841
c865e5d9
SH
842 q->corrupt = r->probability;
843 init_crandom(&q->corrupt_cor, r->correlation);
c865e5d9
SH
844}
845
49545a77 846static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
7bc0f28c 847{
7bc0f28c
HPP
848 const struct tc_netem_rate *r = nla_data(attr);
849
850 q->rate = r->rate;
90b41a1c
HPP
851 q->packet_overhead = r->packet_overhead;
852 q->cell_size = r->cell_size;
809fa972 853 q->cell_overhead = r->cell_overhead;
90b41a1c
HPP
854 if (q->cell_size)
855 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
809fa972
HFS
856 else
857 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
7bc0f28c
HPP
858}
859
49545a77 860static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
661b7972 861{
661b7972 862 const struct nlattr *la;
863 int rem;
864
865 nla_for_each_nested(la, attr, rem) {
866 u16 type = nla_type(la);
867
833fa743 868 switch (type) {
661b7972 869 case NETEM_LOSS_GI: {
870 const struct tc_netem_gimodel *gi = nla_data(la);
871
2494654d 872 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
661b7972 873 pr_info("netem: incorrect gi model size\n");
874 return -EINVAL;
875 }
876
877 q->loss_model = CLG_4_STATES;
878
3fbac2a8 879 q->clg.state = TX_IN_GAP_PERIOD;
661b7972 880 q->clg.a1 = gi->p13;
881 q->clg.a2 = gi->p31;
882 q->clg.a3 = gi->p32;
883 q->clg.a4 = gi->p14;
884 q->clg.a5 = gi->p23;
885 break;
886 }
887
888 case NETEM_LOSS_GE: {
889 const struct tc_netem_gemodel *ge = nla_data(la);
890
2494654d 891 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
892 pr_info("netem: incorrect ge model size\n");
661b7972 893 return -EINVAL;
894 }
895
896 q->loss_model = CLG_GILB_ELL;
3fbac2a8 897 q->clg.state = GOOD_STATE;
661b7972 898 q->clg.a1 = ge->p;
899 q->clg.a2 = ge->r;
900 q->clg.a3 = ge->h;
901 q->clg.a4 = ge->k1;
902 break;
903 }
904
905 default:
906 pr_info("netem: unknown loss type %u\n", type);
907 return -EINVAL;
908 }
909 }
910
911 return 0;
912}
913
27a3421e
PM
914static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
915 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
916 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
917 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
7bc0f28c 918 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
661b7972 919 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
e4ae004b 920 [TCA_NETEM_ECN] = { .type = NLA_U32 },
6a031f67 921 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
99803171
DT
922 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
923 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
836af83b 924 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
27a3421e
PM
925};
926
2c10b32b
TG
927static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
928 const struct nla_policy *policy, int len)
929{
930 int nested_len = nla_len(nla) - NLA_ALIGN(len);
931
661b7972 932 if (nested_len < 0) {
933 pr_info("netem: invalid attributes len %d\n", nested_len);
2c10b32b 934 return -EINVAL;
661b7972 935 }
936
2c10b32b
TG
937 if (nested_len >= nla_attr_size(0))
938 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
fceb6435 939 nested_len, policy, NULL);
661b7972 940
2c10b32b
TG
941 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
942 return 0;
943}
944
c865e5d9 945/* Parse netlink message to set options */
2030721c
AA
946static int netem_change(struct Qdisc *sch, struct nlattr *opt,
947 struct netlink_ext_ack *extack)
1da177e4
LT
948{
949 struct netem_sched_data *q = qdisc_priv(sch);
b03f4672 950 struct nlattr *tb[TCA_NETEM_MAX + 1];
1da177e4 951 struct tc_netem_qopt *qopt;
54a4b05c
YY
952 struct clgstate old_clg;
953 int old_loss_model = CLG_RANDOM;
1da177e4 954 int ret;
10297b99 955
b03f4672 956 if (opt == NULL)
1da177e4
LT
957 return -EINVAL;
958
2c10b32b
TG
959 qopt = nla_data(opt);
960 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
b03f4672
PM
961 if (ret < 0)
962 return ret;
963
54a4b05c
YY
964 /* backup q->clg and q->loss_model */
965 old_clg = q->clg;
966 old_loss_model = q->loss_model;
967
968 if (tb[TCA_NETEM_LOSS]) {
49545a77 969 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
54a4b05c
YY
970 if (ret) {
971 q->loss_model = old_loss_model;
972 return ret;
973 }
974 } else {
975 q->loss_model = CLG_RANDOM;
976 }
977
978 if (tb[TCA_NETEM_DELAY_DIST]) {
0a9fe5c3
YS
979 ret = get_dist_table(sch, &q->delay_dist,
980 tb[TCA_NETEM_DELAY_DIST]);
981 if (ret)
982 goto get_table_failure;
983 }
984
985 if (tb[TCA_NETEM_SLOT_DIST]) {
986 ret = get_dist_table(sch, &q->slot_dist,
987 tb[TCA_NETEM_SLOT_DIST]);
988 if (ret)
989 goto get_table_failure;
54a4b05c
YY
990 }
991
50612537 992 sch->limit = qopt->limit;
10297b99 993
112f9cb6
DT
994 q->latency = PSCHED_TICKS2NS(qopt->latency);
995 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
1da177e4
LT
996 q->limit = qopt->limit;
997 q->gap = qopt->gap;
0dca51d3 998 q->counter = 0;
1da177e4
LT
999 q->loss = qopt->loss;
1000 q->duplicate = qopt->duplicate;
1001
bb2f8cc0
SH
1002 /* for compatibility with earlier versions.
1003 * if gap is set, need to assume 100% probability
0dca51d3 1004 */
a362e0a7
SH
1005 if (q->gap)
1006 q->reorder = ~0;
0dca51d3 1007
265eb67f 1008 if (tb[TCA_NETEM_CORR])
49545a77 1009 get_correlation(q, tb[TCA_NETEM_CORR]);
1da177e4 1010
265eb67f 1011 if (tb[TCA_NETEM_REORDER])
49545a77 1012 get_reorder(q, tb[TCA_NETEM_REORDER]);
1da177e4 1013
265eb67f 1014 if (tb[TCA_NETEM_CORRUPT])
49545a77 1015 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1da177e4 1016
7bc0f28c 1017 if (tb[TCA_NETEM_RATE])
49545a77 1018 get_rate(q, tb[TCA_NETEM_RATE]);
7bc0f28c 1019
6a031f67
YY
1020 if (tb[TCA_NETEM_RATE64])
1021 q->rate = max_t(u64, q->rate,
1022 nla_get_u64(tb[TCA_NETEM_RATE64]));
1023
99803171
DT
1024 if (tb[TCA_NETEM_LATENCY64])
1025 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1026
1027 if (tb[TCA_NETEM_JITTER64])
1028 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1029
e4ae004b
ED
1030 if (tb[TCA_NETEM_ECN])
1031 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1032
836af83b
DT
1033 if (tb[TCA_NETEM_SLOT])
1034 get_slot(q, tb[TCA_NETEM_SLOT]);
1035
661b7972 1036 return ret;
0a9fe5c3
YS
1037
1038get_table_failure:
1039 /* recover clg and loss_model, in case of
1040 * q->clg and q->loss_model were modified
1041 * in get_loss_clg()
1042 */
1043 q->clg = old_clg;
1044 q->loss_model = old_loss_model;
1045 return ret;
1da177e4
LT
1046}
1047
e63d7dfd
AA
1048static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1049 struct netlink_ext_ack *extack)
1da177e4
LT
1050{
1051 struct netem_sched_data *q = qdisc_priv(sch);
1052 int ret;
1053
634576a1
NA
1054 qdisc_watchdog_init(&q->watchdog, sch);
1055
1da177e4
LT
1056 if (!opt)
1057 return -EINVAL;
1058
661b7972 1059 q->loss_model = CLG_RANDOM;
2030721c 1060 ret = netem_change(sch, opt, extack);
50612537 1061 if (ret)
250a65f7 1062 pr_info("netem: change failed\n");
1da177e4
LT
1063 return ret;
1064}
1065
1066static void netem_destroy(struct Qdisc *sch)
1067{
1068 struct netem_sched_data *q = qdisc_priv(sch);
1069
59cb5c67 1070 qdisc_watchdog_cancel(&q->watchdog);
50612537 1071 if (q->qdisc)
86bd446b 1072 qdisc_put(q->qdisc);
6373a9a2 1073 dist_free(q->delay_dist);
0a9fe5c3 1074 dist_free(q->slot_dist);
1da177e4
LT
1075}
1076
661b7972 1077static int dump_loss_model(const struct netem_sched_data *q,
1078 struct sk_buff *skb)
1079{
1080 struct nlattr *nest;
1081
1082 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
1083 if (nest == NULL)
1084 goto nla_put_failure;
1085
1086 switch (q->loss_model) {
1087 case CLG_RANDOM:
1088 /* legacy loss model */
1089 nla_nest_cancel(skb, nest);
1090 return 0; /* no data */
1091
1092 case CLG_4_STATES: {
1093 struct tc_netem_gimodel gi = {
1094 .p13 = q->clg.a1,
1095 .p31 = q->clg.a2,
1096 .p32 = q->clg.a3,
1097 .p14 = q->clg.a4,
1098 .p23 = q->clg.a5,
1099 };
1100
1b34ec43
DM
1101 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1102 goto nla_put_failure;
661b7972 1103 break;
1104 }
1105 case CLG_GILB_ELL: {
1106 struct tc_netem_gemodel ge = {
1107 .p = q->clg.a1,
1108 .r = q->clg.a2,
1109 .h = q->clg.a3,
1110 .k1 = q->clg.a4,
1111 };
1112
1b34ec43
DM
1113 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1114 goto nla_put_failure;
661b7972 1115 break;
1116 }
1117 }
1118
1119 nla_nest_end(skb, nest);
1120 return 0;
1121
1122nla_put_failure:
1123 nla_nest_cancel(skb, nest);
1124 return -1;
1125}
1126
1da177e4
LT
1127static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1128{
1129 const struct netem_sched_data *q = qdisc_priv(sch);
861d7f74 1130 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1da177e4
LT
1131 struct tc_netem_qopt qopt;
1132 struct tc_netem_corr cor;
0dca51d3 1133 struct tc_netem_reorder reorder;
c865e5d9 1134 struct tc_netem_corrupt corrupt;
7bc0f28c 1135 struct tc_netem_rate rate;
836af83b 1136 struct tc_netem_slot slot;
1da177e4 1137
112f9cb6
DT
1138 qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1139 UINT_MAX);
1140 qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1141 UINT_MAX);
1da177e4
LT
1142 qopt.limit = q->limit;
1143 qopt.loss = q->loss;
1144 qopt.gap = q->gap;
1145 qopt.duplicate = q->duplicate;
1b34ec43
DM
1146 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1147 goto nla_put_failure;
1da177e4 1148
99803171
DT
1149 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1150 goto nla_put_failure;
1151
1152 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1153 goto nla_put_failure;
1154
1da177e4
LT
1155 cor.delay_corr = q->delay_cor.rho;
1156 cor.loss_corr = q->loss_cor.rho;
1157 cor.dup_corr = q->dup_cor.rho;
1b34ec43
DM
1158 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1159 goto nla_put_failure;
0dca51d3
SH
1160
1161 reorder.probability = q->reorder;
1162 reorder.correlation = q->reorder_cor.rho;
1b34ec43
DM
1163 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1164 goto nla_put_failure;
0dca51d3 1165
c865e5d9
SH
1166 corrupt.probability = q->corrupt;
1167 corrupt.correlation = q->corrupt_cor.rho;
1b34ec43
DM
1168 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1169 goto nla_put_failure;
c865e5d9 1170
6a031f67 1171 if (q->rate >= (1ULL << 32)) {
2a51c1e8
ND
1172 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1173 TCA_NETEM_PAD))
6a031f67
YY
1174 goto nla_put_failure;
1175 rate.rate = ~0U;
1176 } else {
1177 rate.rate = q->rate;
1178 }
90b41a1c
HPP
1179 rate.packet_overhead = q->packet_overhead;
1180 rate.cell_size = q->cell_size;
1181 rate.cell_overhead = q->cell_overhead;
1b34ec43
DM
1182 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1183 goto nla_put_failure;
7bc0f28c 1184
e4ae004b
ED
1185 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1186 goto nla_put_failure;
1187
661b7972 1188 if (dump_loss_model(q, skb) != 0)
1189 goto nla_put_failure;
1190
0a9fe5c3
YS
1191 if (q->slot_config.min_delay | q->slot_config.max_delay |
1192 q->slot_config.dist_jitter) {
836af83b
DT
1193 slot = q->slot_config;
1194 if (slot.max_packets == INT_MAX)
1195 slot.max_packets = 0;
1196 if (slot.max_bytes == INT_MAX)
1197 slot.max_bytes = 0;
1198 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1199 goto nla_put_failure;
1200 }
1201
861d7f74 1202 return nla_nest_end(skb, nla);
1da177e4 1203
1e90474c 1204nla_put_failure:
861d7f74 1205 nlmsg_trim(skb, nla);
1da177e4
LT
1206 return -1;
1207}
1208
10f6dfcf 1209static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1210 struct sk_buff *skb, struct tcmsg *tcm)
1211{
1212 struct netem_sched_data *q = qdisc_priv(sch);
1213
50612537 1214 if (cl != 1 || !q->qdisc) /* only one class */
10f6dfcf 1215 return -ENOENT;
1216
1217 tcm->tcm_handle |= TC_H_MIN(1);
1218 tcm->tcm_info = q->qdisc->handle;
1219
1220 return 0;
1221}
1222
1223static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
653d6fd6 1224 struct Qdisc **old, struct netlink_ext_ack *extack)
10f6dfcf 1225{
1226 struct netem_sched_data *q = qdisc_priv(sch);
1227
86a7996c 1228 *old = qdisc_replace(sch, new, &q->qdisc);
10f6dfcf 1229 return 0;
1230}
1231
1232static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1233{
1234 struct netem_sched_data *q = qdisc_priv(sch);
1235 return q->qdisc;
1236}
1237
143976ce 1238static unsigned long netem_find(struct Qdisc *sch, u32 classid)
10f6dfcf 1239{
1240 return 1;
1241}
1242
10f6dfcf 1243static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1244{
1245 if (!walker->stop) {
1246 if (walker->count >= walker->skip)
1247 if (walker->fn(sch, 1, walker) < 0) {
1248 walker->stop = 1;
1249 return;
1250 }
1251 walker->count++;
1252 }
1253}
1254
1255static const struct Qdisc_class_ops netem_class_ops = {
1256 .graft = netem_graft,
1257 .leaf = netem_leaf,
143976ce 1258 .find = netem_find,
10f6dfcf 1259 .walk = netem_walk,
1260 .dump = netem_dump_class,
1261};
1262
20fea08b 1263static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1da177e4 1264 .id = "netem",
10f6dfcf 1265 .cl_ops = &netem_class_ops,
1da177e4
LT
1266 .priv_size = sizeof(struct netem_sched_data),
1267 .enqueue = netem_enqueue,
1268 .dequeue = netem_dequeue,
77be155c 1269 .peek = qdisc_peek_dequeued,
1da177e4
LT
1270 .init = netem_init,
1271 .reset = netem_reset,
1272 .destroy = netem_destroy,
1273 .change = netem_change,
1274 .dump = netem_dump,
1275 .owner = THIS_MODULE,
1276};
1277
1278
1279static int __init netem_module_init(void)
1280{
eb229c4c 1281 pr_info("netem: version " VERSION "\n");
1da177e4
LT
1282 return register_qdisc(&netem_qdisc_ops);
1283}
1284static void __exit netem_module_exit(void)
1285{
1286 unregister_qdisc(&netem_qdisc_ops);
1287}
1288module_init(netem_module_init)
1289module_exit(netem_module_exit)
1290MODULE_LICENSE("GPL");