2 * net/sched/sch_netem.c Network emulator
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
9 * Many of the algorithms and ideas for this came from
10 * NIST Net which is not copyrighted.
12 * Authors: Stephen Hemminger <shemminger@osdl.org>
13 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
17 #include <linux/module.h>
18 #include <linux/slab.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/errno.h>
22 #include <linux/skbuff.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/reciprocal_div.h>
26 #include <linux/rbtree.h>
28 #include <net/netlink.h>
29 #include <net/pkt_sched.h>
30 #include <net/inet_ecn.h>
34 /* Network Emulation Queuing algorithm.
35 ====================================
37 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
38 Network Emulation Tool
39 [2] Luigi Rizzo, DummyNet for FreeBSD
41 ----------------------------------------------------------------
43 This started out as a simple way to delay outgoing packets to
44 test TCP but has grown to include most of the functionality
45 of a full blown network emulator like NISTnet. It can delay
46 packets and add random jitter (and correlation). The random
47 distribution can be loaded from a table as well to provide
48 normal, Pareto, or experimental curves. Packet loss,
49 duplication, and reordering can also be emulated.
51 This qdisc does not do classification that can be handled in
52 layering other disciplines. It does not need to do bandwidth
53 control either since that can be handled by using token
54 bucket or other rate control.
56 Correlated Loss Generator models
58 Added generation of correlated loss according to the
59 "Gilbert-Elliot" model, a 4-state markov model.
62 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
63 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
64 and intuitive loss model for packet networks and its implementation
65 in the Netem module in the Linux kernel", available in [1]
67 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
68 Fabio Ludovici <fabio.ludovici at yahoo.it>
76 struct netem_sched_data {
77 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
78 struct rb_root t_root;
80 /* a linear queue; reduces rbtree rebalancing when jitter is low */
81 struct sk_buff *t_head;
82 struct sk_buff *t_tail;
84 /* optional qdisc for classful handling (NULL at netem init) */
87 struct qdisc_watchdog watchdog;
103 struct reciprocal_value cell_size_reciprocal;
109 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
111 struct disttable *delay_dist;
120 TX_IN_GAP_PERIOD = 1,
123 LOST_IN_BURST_PERIOD,
131 /* Correlated Loss Generation models */
133 /* state of the Markov chain */
136 /* 4-states and Gilbert-Elliot models */
137 u32 a1; /* p13 for 4-states or p for GE */
138 u32 a2; /* p31 for 4-states or r for GE */
139 u32 a3; /* p32 for 4-states or h for GE */
140 u32 a4; /* p14 for 4-states or 1-k for GE */
141 u32 a5; /* p23 used only in 4-states */
144 struct tc_netem_slot slot_config;
151 struct disttable *slot_dist;
154 /* Time stamp put into socket buffer control block
155 * Only valid when skbs are in our internal t(ime)fifo queue.
157 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
158 * and skb->next & skb->prev are scratch space for a qdisc,
159 * we save skb->tstamp value in skb->cb[] before destroying it.
161 struct netem_skb_cb {
165 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
167 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
168 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
169 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
172 /* init_crandom - initialize correlated random number generator
173 * Use entropy source for initial seed.
175 static void init_crandom(struct crndstate *state, unsigned long rho)
178 state->last = prandom_u32();
181 /* get_crandom - correlated random number generator
182 * Next number depends on last value.
183 * rho is scaled to avoid floating point.
185 static u32 get_crandom(struct crndstate *state)
188 unsigned long answer;
190 if (!state || state->rho == 0) /* no correlation */
191 return prandom_u32();
193 value = prandom_u32();
194 rho = (u64)state->rho + 1;
195 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
196 state->last = answer;
200 /* loss_4state - 4-state model loss generator
201 * Generates losses according to the 4-state Markov chain adopted in
202 * the GI (General and Intuitive) loss model.
204 static bool loss_4state(struct netem_sched_data *q)
206 struct clgstate *clg = &q->clg;
207 u32 rnd = prandom_u32();
210 * Makes a comparison between rnd and the transition
211 * probabilities outgoing from the current state, then decides the
212 * next state and if the next packet has to be transmitted or lost.
213 * The four states correspond to:
214 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
215 * LOST_IN_BURST_PERIOD => isolated losses within a gap period
216 * LOST_IN_GAP_PERIOD => lost packets within a burst period
217 * TX_IN_GAP_PERIOD => successfully transmitted packets within a burst period
219 switch (clg->state) {
220 case TX_IN_GAP_PERIOD:
222 clg->state = LOST_IN_BURST_PERIOD;
224 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
225 clg->state = LOST_IN_GAP_PERIOD;
227 } else if (clg->a1 + clg->a4 < rnd) {
228 clg->state = TX_IN_GAP_PERIOD;
232 case TX_IN_BURST_PERIOD:
234 clg->state = LOST_IN_GAP_PERIOD;
237 clg->state = TX_IN_BURST_PERIOD;
241 case LOST_IN_GAP_PERIOD:
243 clg->state = TX_IN_BURST_PERIOD;
244 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
245 clg->state = TX_IN_GAP_PERIOD;
246 } else if (clg->a2 + clg->a3 < rnd) {
247 clg->state = LOST_IN_GAP_PERIOD;
251 case LOST_IN_BURST_PERIOD:
252 clg->state = TX_IN_GAP_PERIOD;
259 /* loss_gilb_ell - Gilbert-Elliot model loss generator
260 * Generates losses according to the Gilbert-Elliot loss model or
261 * its special cases (Gilbert or Simple Gilbert)
263 * Makes a comparison between random number and the transition
264 * probabilities outgoing from the current state, then decides the
265 * next state. A second random number is extracted and the comparison
266 * with the loss probability of the current state decides if the next
267 * packet will be transmitted or lost.
269 static bool loss_gilb_ell(struct netem_sched_data *q)
271 struct clgstate *clg = &q->clg;
273 switch (clg->state) {
275 if (prandom_u32() < clg->a1)
276 clg->state = BAD_STATE;
277 if (prandom_u32() < clg->a4)
281 if (prandom_u32() < clg->a2)
282 clg->state = GOOD_STATE;
283 if (prandom_u32() > clg->a3)
290 static bool loss_event(struct netem_sched_data *q)
292 switch (q->loss_model) {
294 /* Random packet drop 0 => none, ~0 => all */
295 return q->loss && q->loss >= get_crandom(&q->loss_cor);
298 /* 4state loss model algorithm (used also for GI model)
299 * Extracts a value from the markov 4 state loss generator,
300 * if it is 1 drops a packet and if needed writes the event in
303 return loss_4state(q);
306 /* Gilbert-Elliot loss model algorithm
307 * Extracts a value from the Gilbert-Elliot loss generator,
308 * if it is 1 drops a packet and if needed writes the event in
311 return loss_gilb_ell(q);
314 return false; /* not reached */
318 /* tabledist - return a pseudo-randomly distributed value with mean mu and
319 * std deviation sigma. Uses table lookup to approximate the desired
320 * distribution, and a uniformly-distributed pseudo-random source.
322 static s64 tabledist(s64 mu, s32 sigma,
323 struct crndstate *state,
324 const struct disttable *dist)
333 rnd = get_crandom(state);
335 /* default uniform distribution */
337 return ((rnd % (2 * sigma)) + mu) - sigma;
339 t = dist->table[rnd % dist->size];
340 x = (sigma % NETEM_DIST_SCALE) * t;
342 x += NETEM_DIST_SCALE/2;
344 x -= NETEM_DIST_SCALE/2;
346 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
349 static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
351 len += q->packet_overhead;
354 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
356 if (len > cells * q->cell_size) /* extra cell needed for remainder */
358 len = cells * (q->cell_size + q->cell_overhead);
361 return div64_u64(len * NSEC_PER_SEC, q->rate);
364 static void tfifo_reset(struct Qdisc *sch)
366 struct netem_sched_data *q = qdisc_priv(sch);
367 struct rb_node *p = rb_first(&q->t_root);
370 struct sk_buff *skb = rb_to_skb(p);
373 rb_erase(&skb->rbnode, &q->t_root);
374 rtnl_kfree_skbs(skb, skb);
377 rtnl_kfree_skbs(q->t_head, q->t_tail);
382 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
384 struct netem_sched_data *q = qdisc_priv(sch);
385 u64 tnext = netem_skb_cb(nskb)->time_to_send;
387 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
389 q->t_tail->next = nskb;
394 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
400 skb = rb_to_skb(parent);
401 if (tnext >= netem_skb_cb(skb)->time_to_send)
402 p = &parent->rb_right;
404 p = &parent->rb_left;
406 rb_link_node(&nskb->rbnode, parent, p);
407 rb_insert_color(&nskb->rbnode, &q->t_root);
412 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
413 * when we statistically choose to corrupt one, we instead segment it, returning
414 * the first packet to be corrupted, and re-enqueue the remaining frames
416 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
417 struct sk_buff **to_free)
419 struct sk_buff *segs;
420 netdev_features_t features = netif_skb_features(skb);
422 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
424 if (IS_ERR_OR_NULL(segs)) {
425 qdisc_drop(skb, sch, to_free);
433 * Insert one skb into qdisc.
434 * Note: parent depends on return value to account for queue length.
435 * NET_XMIT_DROP: queue length didn't change.
436 * NET_XMIT_SUCCESS: one skb was queued.
438 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
439 struct sk_buff **to_free)
441 struct netem_sched_data *q = qdisc_priv(sch);
442 /* We don't fill cb now as skb_unshare() may invalidate it */
443 struct netem_skb_cb *cb;
444 struct sk_buff *skb2;
445 struct sk_buff *segs = NULL;
446 unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
449 int rc = NET_XMIT_SUCCESS;
450 int rc_drop = NET_XMIT_DROP;
452 /* Do not fool qdisc_drop_all() */
455 /* Random duplication */
456 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
461 if (q->ecn && INET_ECN_set_ce(skb))
462 qdisc_qstats_drop(sch); /* mark packet */
467 qdisc_qstats_drop(sch);
468 __qdisc_drop(skb, to_free);
469 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
472 /* If a delay is expected, orphan the skb. (orphaning usually takes
473 * place at TX completion time, so _before_ the link transit delay)
475 if (q->latency || q->jitter || q->rate)
476 skb_orphan_partial(skb);
479 * If we need to duplicate packet, then re-insert at top of the
480 * qdisc tree, since parent queuer expects that only one
481 * skb will be queued.
483 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
484 struct Qdisc *rootq = qdisc_root(sch);
485 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
488 rootq->enqueue(skb2, rootq, to_free);
489 q->duplicate = dupsave;
490 rc_drop = NET_XMIT_SUCCESS;
494 * Randomized packet corruption.
495 * Make copy if needed since we are modifying
496 * If packet is going to be hardware checksummed, then
497 * do it now in software before we mangle it.
499 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
500 if (skb_is_gso(skb)) {
501 segs = netem_segment(skb, sch, to_free);
511 skb = skb_unshare(skb, GFP_ATOMIC);
512 if (unlikely(!skb)) {
513 qdisc_qstats_drop(sch);
516 if (skb->ip_summed == CHECKSUM_PARTIAL &&
517 skb_checksum_help(skb)) {
518 qdisc_drop(skb, sch, to_free);
522 skb->data[prandom_u32() % skb_headlen(skb)] ^=
523 1<<(prandom_u32() % 8);
526 if (unlikely(sch->q.qlen >= sch->limit)) {
527 qdisc_drop_all(skb, sch, to_free);
531 qdisc_qstats_backlog_inc(sch, skb);
533 cb = netem_skb_cb(skb);
534 if (q->gap == 0 || /* not doing reordering */
535 q->counter < q->gap - 1 || /* inside last reordering gap */
536 q->reorder < get_crandom(&q->reorder_cor)) {
540 delay = tabledist(q->latency, q->jitter,
541 &q->delay_cor, q->delay_dist);
543 now = ktime_get_ns();
546 struct netem_skb_cb *last = NULL;
549 last = netem_skb_cb(sch->q.tail);
550 if (q->t_root.rb_node) {
551 struct sk_buff *t_skb;
552 struct netem_skb_cb *t_last;
554 t_skb = skb_rb_last(&q->t_root);
555 t_last = netem_skb_cb(t_skb);
557 t_last->time_to_send > last->time_to_send)
561 struct netem_skb_cb *t_last =
562 netem_skb_cb(q->t_tail);
565 t_last->time_to_send > last->time_to_send)
571 * Last packet in queue is reference point (now),
572 * calculate this time bonus and subtract
575 delay -= last->time_to_send - now;
576 delay = max_t(s64, 0, delay);
577 now = last->time_to_send;
580 delay += packet_time_ns(qdisc_pkt_len(skb), q);
583 cb->time_to_send = now + delay;
585 tfifo_enqueue(skb, sch);
588 * Do re-ordering by putting one out of N packets at the front
591 cb->time_to_send = ktime_get_ns();
594 __qdisc_enqueue_head(skb, &sch->q);
595 sch->qstats.requeues++;
602 skb_mark_not_on_list(segs);
603 qdisc_skb_cb(segs)->pkt_len = segs->len;
604 last_len = segs->len;
605 rc = qdisc_enqueue(segs, sch, to_free);
606 if (rc != NET_XMIT_SUCCESS) {
607 if (net_xmit_drop_count(rc))
608 qdisc_qstats_drop(sch);
617 qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
619 return NET_XMIT_SUCCESS;
622 /* Delay the next round with a new future slot with a
623 * correct number of bytes and packets.
626 static void get_slot_next(struct netem_sched_data *q, u64 now)
631 next_delay = q->slot_config.min_delay +
633 (q->slot_config.max_delay -
634 q->slot_config.min_delay) >> 32);
636 next_delay = tabledist(q->slot_config.dist_delay,
637 (s32)(q->slot_config.dist_jitter),
640 q->slot.slot_next = now + next_delay;
641 q->slot.packets_left = q->slot_config.max_packets;
642 q->slot.bytes_left = q->slot_config.max_bytes;
645 static struct sk_buff *netem_peek(struct netem_sched_data *q)
647 struct sk_buff *skb = skb_rb_first(&q->t_root);
655 t1 = netem_skb_cb(skb)->time_to_send;
656 t2 = netem_skb_cb(q->t_head)->time_to_send;
662 static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
664 if (skb == q->t_head) {
665 q->t_head = skb->next;
669 rb_erase(&skb->rbnode, &q->t_root);
673 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
675 struct netem_sched_data *q = qdisc_priv(sch);
679 skb = __qdisc_dequeue_head(&sch->q);
681 qdisc_qstats_backlog_dec(sch, skb);
683 qdisc_bstats_update(sch, skb);
689 u64 now = ktime_get_ns();
691 /* if more time remaining? */
692 time_to_send = netem_skb_cb(skb)->time_to_send;
693 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
694 get_slot_next(q, now);
696 if (time_to_send <= now && q->slot.slot_next <= now) {
697 netem_erase_head(q, skb);
699 qdisc_qstats_backlog_dec(sch, skb);
702 /* skb->dev shares skb->rbnode area,
703 * we need to restore its value.
705 skb->dev = qdisc_dev(sch);
707 if (q->slot.slot_next) {
708 q->slot.packets_left--;
709 q->slot.bytes_left -= qdisc_pkt_len(skb);
710 if (q->slot.packets_left <= 0 ||
711 q->slot.bytes_left <= 0)
712 get_slot_next(q, now);
716 unsigned int pkt_len = qdisc_pkt_len(skb);
717 struct sk_buff *to_free = NULL;
720 err = qdisc_enqueue(skb, q->qdisc, &to_free);
721 kfree_skb_list(to_free);
722 if (err != NET_XMIT_SUCCESS &&
723 net_xmit_drop_count(err)) {
724 qdisc_qstats_drop(sch);
725 qdisc_tree_reduce_backlog(sch, 1,
734 skb = q->qdisc->ops->dequeue(q->qdisc);
739 qdisc_watchdog_schedule_ns(&q->watchdog,
745 skb = q->qdisc->ops->dequeue(q->qdisc);
752 static void netem_reset(struct Qdisc *sch)
754 struct netem_sched_data *q = qdisc_priv(sch);
756 qdisc_reset_queue(sch);
759 qdisc_reset(q->qdisc);
760 qdisc_watchdog_cancel(&q->watchdog);
763 static void dist_free(struct disttable *d)
769 * Distribution data is a variable size payload containing
770 * signed 16 bit values.
773 static int get_dist_table(struct Qdisc *sch, struct disttable **tbl,
774 const struct nlattr *attr)
776 size_t n = nla_len(attr)/sizeof(__s16);
777 const __s16 *data = nla_data(attr);
778 spinlock_t *root_lock;
782 if (n > NETEM_DIST_MAX)
785 d = kvmalloc(sizeof(struct disttable) + n * sizeof(s16), GFP_KERNEL);
790 for (i = 0; i < n; i++)
791 d->table[i] = data[i];
793 root_lock = qdisc_root_sleeping_lock(sch);
795 spin_lock_bh(root_lock);
797 spin_unlock_bh(root_lock);
803 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
805 const struct tc_netem_slot *c = nla_data(attr);
808 if (q->slot_config.max_packets == 0)
809 q->slot_config.max_packets = INT_MAX;
810 if (q->slot_config.max_bytes == 0)
811 q->slot_config.max_bytes = INT_MAX;
812 q->slot.packets_left = q->slot_config.max_packets;
813 q->slot.bytes_left = q->slot_config.max_bytes;
814 if (q->slot_config.min_delay | q->slot_config.max_delay |
815 q->slot_config.dist_jitter)
816 q->slot.slot_next = ktime_get_ns();
818 q->slot.slot_next = 0;
821 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
823 const struct tc_netem_corr *c = nla_data(attr);
825 init_crandom(&q->delay_cor, c->delay_corr);
826 init_crandom(&q->loss_cor, c->loss_corr);
827 init_crandom(&q->dup_cor, c->dup_corr);
830 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
832 const struct tc_netem_reorder *r = nla_data(attr);
834 q->reorder = r->probability;
835 init_crandom(&q->reorder_cor, r->correlation);
838 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
840 const struct tc_netem_corrupt *r = nla_data(attr);
842 q->corrupt = r->probability;
843 init_crandom(&q->corrupt_cor, r->correlation);
846 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
848 const struct tc_netem_rate *r = nla_data(attr);
851 q->packet_overhead = r->packet_overhead;
852 q->cell_size = r->cell_size;
853 q->cell_overhead = r->cell_overhead;
855 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
857 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
860 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
862 const struct nlattr *la;
865 nla_for_each_nested(la, attr, rem) {
866 u16 type = nla_type(la);
869 case NETEM_LOSS_GI: {
870 const struct tc_netem_gimodel *gi = nla_data(la);
872 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
873 pr_info("netem: incorrect gi model size\n");
877 q->loss_model = CLG_4_STATES;
879 q->clg.state = TX_IN_GAP_PERIOD;
888 case NETEM_LOSS_GE: {
889 const struct tc_netem_gemodel *ge = nla_data(la);
891 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
892 pr_info("netem: incorrect ge model size\n");
896 q->loss_model = CLG_GILB_ELL;
897 q->clg.state = GOOD_STATE;
906 pr_info("netem: unknown loss type %u\n", type);
914 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
915 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
916 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
917 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
918 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
919 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
920 [TCA_NETEM_ECN] = { .type = NLA_U32 },
921 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
922 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
923 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
924 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
927 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
928 const struct nla_policy *policy, int len)
930 int nested_len = nla_len(nla) - NLA_ALIGN(len);
932 if (nested_len < 0) {
933 pr_info("netem: invalid attributes len %d\n", nested_len);
937 if (nested_len >= nla_attr_size(0))
938 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
939 nested_len, policy, NULL);
941 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
945 /* Parse netlink message to set options */
946 static int netem_change(struct Qdisc *sch, struct nlattr *opt,
947 struct netlink_ext_ack *extack)
949 struct netem_sched_data *q = qdisc_priv(sch);
950 struct nlattr *tb[TCA_NETEM_MAX + 1];
951 struct tc_netem_qopt *qopt;
952 struct clgstate old_clg;
953 int old_loss_model = CLG_RANDOM;
959 qopt = nla_data(opt);
960 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
964 /* backup q->clg and q->loss_model */
966 old_loss_model = q->loss_model;
968 if (tb[TCA_NETEM_LOSS]) {
969 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
971 q->loss_model = old_loss_model;
975 q->loss_model = CLG_RANDOM;
978 if (tb[TCA_NETEM_DELAY_DIST]) {
979 ret = get_dist_table(sch, &q->delay_dist,
980 tb[TCA_NETEM_DELAY_DIST]);
982 goto get_table_failure;
985 if (tb[TCA_NETEM_SLOT_DIST]) {
986 ret = get_dist_table(sch, &q->slot_dist,
987 tb[TCA_NETEM_SLOT_DIST]);
989 goto get_table_failure;
992 sch->limit = qopt->limit;
994 q->latency = PSCHED_TICKS2NS(qopt->latency);
995 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
996 q->limit = qopt->limit;
999 q->loss = qopt->loss;
1000 q->duplicate = qopt->duplicate;
1002 /* for compatibility with earlier versions.
1003 * if gap is set, need to assume 100% probability
1008 if (tb[TCA_NETEM_CORR])
1009 get_correlation(q, tb[TCA_NETEM_CORR]);
1011 if (tb[TCA_NETEM_REORDER])
1012 get_reorder(q, tb[TCA_NETEM_REORDER]);
1014 if (tb[TCA_NETEM_CORRUPT])
1015 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1017 if (tb[TCA_NETEM_RATE])
1018 get_rate(q, tb[TCA_NETEM_RATE]);
1020 if (tb[TCA_NETEM_RATE64])
1021 q->rate = max_t(u64, q->rate,
1022 nla_get_u64(tb[TCA_NETEM_RATE64]));
1024 if (tb[TCA_NETEM_LATENCY64])
1025 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1027 if (tb[TCA_NETEM_JITTER64])
1028 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1030 if (tb[TCA_NETEM_ECN])
1031 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1033 if (tb[TCA_NETEM_SLOT])
1034 get_slot(q, tb[TCA_NETEM_SLOT]);
1039 /* recover clg and loss_model, in case of
1040 * q->clg and q->loss_model were modified
1044 q->loss_model = old_loss_model;
1048 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1049 struct netlink_ext_ack *extack)
1051 struct netem_sched_data *q = qdisc_priv(sch);
1054 qdisc_watchdog_init(&q->watchdog, sch);
1059 q->loss_model = CLG_RANDOM;
1060 ret = netem_change(sch, opt, extack);
1062 pr_info("netem: change failed\n");
1066 static void netem_destroy(struct Qdisc *sch)
1068 struct netem_sched_data *q = qdisc_priv(sch);
1070 qdisc_watchdog_cancel(&q->watchdog);
1072 qdisc_put(q->qdisc);
1073 dist_free(q->delay_dist);
1074 dist_free(q->slot_dist);
1077 static int dump_loss_model(const struct netem_sched_data *q,
1078 struct sk_buff *skb)
1080 struct nlattr *nest;
1082 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
1084 goto nla_put_failure;
1086 switch (q->loss_model) {
1088 /* legacy loss model */
1089 nla_nest_cancel(skb, nest);
1090 return 0; /* no data */
1092 case CLG_4_STATES: {
1093 struct tc_netem_gimodel gi = {
1101 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1102 goto nla_put_failure;
1105 case CLG_GILB_ELL: {
1106 struct tc_netem_gemodel ge = {
1113 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1114 goto nla_put_failure;
1119 nla_nest_end(skb, nest);
1123 nla_nest_cancel(skb, nest);
1127 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1129 const struct netem_sched_data *q = qdisc_priv(sch);
1130 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1131 struct tc_netem_qopt qopt;
1132 struct tc_netem_corr cor;
1133 struct tc_netem_reorder reorder;
1134 struct tc_netem_corrupt corrupt;
1135 struct tc_netem_rate rate;
1136 struct tc_netem_slot slot;
1138 qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
1140 qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
1142 qopt.limit = q->limit;
1143 qopt.loss = q->loss;
1145 qopt.duplicate = q->duplicate;
1146 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1147 goto nla_put_failure;
1149 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1150 goto nla_put_failure;
1152 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1153 goto nla_put_failure;
1155 cor.delay_corr = q->delay_cor.rho;
1156 cor.loss_corr = q->loss_cor.rho;
1157 cor.dup_corr = q->dup_cor.rho;
1158 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1159 goto nla_put_failure;
1161 reorder.probability = q->reorder;
1162 reorder.correlation = q->reorder_cor.rho;
1163 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1164 goto nla_put_failure;
1166 corrupt.probability = q->corrupt;
1167 corrupt.correlation = q->corrupt_cor.rho;
1168 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1169 goto nla_put_failure;
1171 if (q->rate >= (1ULL << 32)) {
1172 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1174 goto nla_put_failure;
1177 rate.rate = q->rate;
1179 rate.packet_overhead = q->packet_overhead;
1180 rate.cell_size = q->cell_size;
1181 rate.cell_overhead = q->cell_overhead;
1182 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1183 goto nla_put_failure;
1185 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1186 goto nla_put_failure;
1188 if (dump_loss_model(q, skb) != 0)
1189 goto nla_put_failure;
1191 if (q->slot_config.min_delay | q->slot_config.max_delay |
1192 q->slot_config.dist_jitter) {
1193 slot = q->slot_config;
1194 if (slot.max_packets == INT_MAX)
1195 slot.max_packets = 0;
1196 if (slot.max_bytes == INT_MAX)
1198 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1199 goto nla_put_failure;
1202 return nla_nest_end(skb, nla);
1205 nlmsg_trim(skb, nla);
1209 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1210 struct sk_buff *skb, struct tcmsg *tcm)
1212 struct netem_sched_data *q = qdisc_priv(sch);
1214 if (cl != 1 || !q->qdisc) /* only one class */
1217 tcm->tcm_handle |= TC_H_MIN(1);
1218 tcm->tcm_info = q->qdisc->handle;
1223 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1224 struct Qdisc **old, struct netlink_ext_ack *extack)
1226 struct netem_sched_data *q = qdisc_priv(sch);
1228 *old = qdisc_replace(sch, new, &q->qdisc);
1232 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1234 struct netem_sched_data *q = qdisc_priv(sch);
1238 static unsigned long netem_find(struct Qdisc *sch, u32 classid)
1243 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1245 if (!walker->stop) {
1246 if (walker->count >= walker->skip)
1247 if (walker->fn(sch, 1, walker) < 0) {
1255 static const struct Qdisc_class_ops netem_class_ops = {
1256 .graft = netem_graft,
1260 .dump = netem_dump_class,
1263 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1265 .cl_ops = &netem_class_ops,
1266 .priv_size = sizeof(struct netem_sched_data),
1267 .enqueue = netem_enqueue,
1268 .dequeue = netem_dequeue,
1269 .peek = qdisc_peek_dequeued,
1271 .reset = netem_reset,
1272 .destroy = netem_destroy,
1273 .change = netem_change,
1275 .owner = THIS_MODULE,
1279 static int __init netem_module_init(void)
1281 pr_info("netem: version " VERSION "\n");
1282 return register_qdisc(&netem_qdisc_ops);
1284 static void __exit netem_module_exit(void)
1286 unregister_qdisc(&netem_qdisc_ops);
1288 module_init(netem_module_init)
1289 module_exit(netem_module_exit)
1290 MODULE_LICENSE("GPL");