1 // SPDX-License-Identifier: GPL-2.0-only
3 * net/sched/sch_netem.c Network emulator
5 * Many of the algorithms and ideas for this came from
6 * NIST Net which is not copyrighted.
8 * Authors: Stephen Hemminger <shemminger@osdl.org>
9 * Catalin(ux aka Dino) BOIE <catab at umbrella dot ro>
13 #include <linux/module.h>
14 #include <linux/slab.h>
15 #include <linux/types.h>
16 #include <linux/kernel.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <linux/vmalloc.h>
20 #include <linux/rtnetlink.h>
21 #include <linux/reciprocal_div.h>
22 #include <linux/rbtree.h>
25 #include <net/netlink.h>
26 #include <net/pkt_sched.h>
27 #include <net/inet_ecn.h>
31 /* Network Emulation Queuing algorithm.
32 ====================================
34 Sources: [1] Mark Carson, Darrin Santay, "NIST Net - A Linux-based
35 Network Emulation Tool
36 [2] Luigi Rizzo, DummyNet for FreeBSD
38 ----------------------------------------------------------------
40 This started out as a simple way to delay outgoing packets to
41 test TCP but has grown to include most of the functionality
42 of a full blown network emulator like NISTnet. It can delay
43 packets and add random jitter (and correlation). The random
44 distribution can be loaded from a table as well to provide
45 normal, Pareto, or experimental curves. Packet loss,
46 duplication, and reordering can also be emulated.
48 This qdisc does not do classification that can be handled in
49 layering other disciplines. It does not need to do bandwidth
50 control either since that can be handled by using token
51 bucket or other rate control.
53 Correlated Loss Generator models
55 Added generation of correlated loss according to the
56 "Gilbert-Elliot" model, a 4-state markov model.
59 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
60 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
61 and intuitive loss model for packet networks and its implementation
62 in the Netem module in the Linux kernel", available in [1]
64 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
65 Fabio Ludovici <fabio.ludovici at yahoo.it>
73 struct netem_sched_data {
74 /* internal t(ime)fifo qdisc uses t_root and sch->limit */
75 struct rb_root t_root;
77 /* a linear queue; reduces rbtree rebalancing when jitter is low */
78 struct sk_buff *t_head;
79 struct sk_buff *t_tail;
81 /* optional qdisc for classful handling (NULL at netem init) */
84 struct qdisc_watchdog watchdog;
100 struct reciprocal_value cell_size_reciprocal;
106 } delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
108 struct disttable *delay_dist;
117 TX_IN_GAP_PERIOD = 1,
120 LOST_IN_BURST_PERIOD,
128 /* Correlated Loss Generation models */
130 /* state of the Markov chain */
133 /* 4-states and Gilbert-Elliot models */
134 u32 a1; /* p13 for 4-states or p for GE */
135 u32 a2; /* p31 for 4-states or r for GE */
136 u32 a3; /* p32 for 4-states or h for GE */
137 u32 a4; /* p14 for 4-states or 1-k for GE */
138 u32 a5; /* p23 used only in 4-states */
141 struct tc_netem_slot slot_config;
148 struct disttable *slot_dist;
151 /* Time stamp put into socket buffer control block
152 * Only valid when skbs are in our internal t(ime)fifo queue.
154 * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
155 * and skb->next & skb->prev are scratch space for a qdisc,
156 * we save skb->tstamp value in skb->cb[] before destroying it.
158 struct netem_skb_cb {
162 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
164 /* we assume we can use skb next/prev/tstamp as storage for rb_node */
165 qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
166 return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
169 /* init_crandom - initialize correlated random number generator
170 * Use entropy source for initial seed.
172 static void init_crandom(struct crndstate *state, unsigned long rho)
175 state->last = get_random_u32();
178 /* get_crandom - correlated random number generator
179 * Next number depends on last value.
180 * rho is scaled to avoid floating point.
182 static u32 get_crandom(struct crndstate *state)
185 unsigned long answer;
187 if (!state || state->rho == 0) /* no correlation */
188 return get_random_u32();
190 value = get_random_u32();
191 rho = (u64)state->rho + 1;
192 answer = (value * ((1ull<<32) - rho) + state->last * rho) >> 32;
193 state->last = answer;
197 /* loss_4state - 4-state model loss generator
198 * Generates losses according to the 4-state Markov chain adopted in
199 * the GI (General and Intuitive) loss model.
201 static bool loss_4state(struct netem_sched_data *q)
203 struct clgstate *clg = &q->clg;
204 u32 rnd = get_random_u32();
207 * Makes a comparison between rnd and the transition
208 * probabilities outgoing from the current state, then decides the
209 * next state and if the next packet has to be transmitted or lost.
210 * The four states correspond to:
211 * TX_IN_GAP_PERIOD => successfully transmitted packets within a gap period
212 * LOST_IN_GAP_PERIOD => isolated losses within a gap period
213 * LOST_IN_BURST_PERIOD => lost packets within a burst period
214 * TX_IN_BURST_PERIOD => successfully transmitted packets within a burst period
216 switch (clg->state) {
217 case TX_IN_GAP_PERIOD:
219 clg->state = LOST_IN_GAP_PERIOD;
221 } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) {
222 clg->state = LOST_IN_BURST_PERIOD;
224 } else if (clg->a1 + clg->a4 < rnd) {
225 clg->state = TX_IN_GAP_PERIOD;
229 case TX_IN_BURST_PERIOD:
231 clg->state = LOST_IN_BURST_PERIOD;
234 clg->state = TX_IN_BURST_PERIOD;
238 case LOST_IN_BURST_PERIOD:
240 clg->state = TX_IN_BURST_PERIOD;
241 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
242 clg->state = TX_IN_GAP_PERIOD;
243 } else if (clg->a2 + clg->a3 < rnd) {
244 clg->state = LOST_IN_BURST_PERIOD;
248 case LOST_IN_GAP_PERIOD:
249 clg->state = TX_IN_GAP_PERIOD;
256 /* loss_gilb_ell - Gilbert-Elliot model loss generator
257 * Generates losses according to the Gilbert-Elliot loss model or
258 * its special cases (Gilbert or Simple Gilbert)
260 * Makes a comparison between random number and the transition
261 * probabilities outgoing from the current state, then decides the
262 * next state. A second random number is extracted and the comparison
263 * with the loss probability of the current state decides if the next
264 * packet will be transmitted or lost.
266 static bool loss_gilb_ell(struct netem_sched_data *q)
268 struct clgstate *clg = &q->clg;
270 switch (clg->state) {
272 if (get_random_u32() < clg->a1)
273 clg->state = BAD_STATE;
274 if (get_random_u32() < clg->a4)
278 if (get_random_u32() < clg->a2)
279 clg->state = GOOD_STATE;
280 if (get_random_u32() > clg->a3)
287 static bool loss_event(struct netem_sched_data *q)
289 switch (q->loss_model) {
291 /* Random packet drop 0 => none, ~0 => all */
292 return q->loss && q->loss >= get_crandom(&q->loss_cor);
295 /* 4state loss model algorithm (used also for GI model)
296 * Extracts a value from the markov 4 state loss generator,
297 * if it is 1 drops a packet and if needed writes the event in
300 return loss_4state(q);
303 /* Gilbert-Elliot loss model algorithm
304 * Extracts a value from the Gilbert-Elliot loss generator,
305 * if it is 1 drops a packet and if needed writes the event in
308 return loss_gilb_ell(q);
311 return false; /* not reached */
315 /* tabledist - return a pseudo-randomly distributed value with mean mu and
316 * std deviation sigma. Uses table lookup to approximate the desired
317 * distribution, and a uniformly-distributed pseudo-random source.
319 static s64 tabledist(s64 mu, s32 sigma,
320 struct crndstate *state,
321 const struct disttable *dist)
330 rnd = get_crandom(state);
332 /* default uniform distribution */
334 return ((rnd % (2 * (u32)sigma)) + mu) - sigma;
336 t = dist->table[rnd % dist->size];
337 x = (sigma % NETEM_DIST_SCALE) * t;
339 x += NETEM_DIST_SCALE/2;
341 x -= NETEM_DIST_SCALE/2;
343 return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
346 static u64 packet_time_ns(u64 len, const struct netem_sched_data *q)
348 len += q->packet_overhead;
351 u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
353 if (len > cells * q->cell_size) /* extra cell needed for remainder */
355 len = cells * (q->cell_size + q->cell_overhead);
358 return div64_u64(len * NSEC_PER_SEC, q->rate);
361 static void tfifo_reset(struct Qdisc *sch)
363 struct netem_sched_data *q = qdisc_priv(sch);
364 struct rb_node *p = rb_first(&q->t_root);
367 struct sk_buff *skb = rb_to_skb(p);
370 rb_erase(&skb->rbnode, &q->t_root);
371 rtnl_kfree_skbs(skb, skb);
374 rtnl_kfree_skbs(q->t_head, q->t_tail);
379 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
381 struct netem_sched_data *q = qdisc_priv(sch);
382 u64 tnext = netem_skb_cb(nskb)->time_to_send;
384 if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
386 q->t_tail->next = nskb;
391 struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
397 skb = rb_to_skb(parent);
398 if (tnext >= netem_skb_cb(skb)->time_to_send)
399 p = &parent->rb_right;
401 p = &parent->rb_left;
403 rb_link_node(&nskb->rbnode, parent, p);
404 rb_insert_color(&nskb->rbnode, &q->t_root);
409 /* netem can't properly corrupt a megapacket (like we get from GSO), so instead
410 * when we statistically choose to corrupt one, we instead segment it, returning
411 * the first packet to be corrupted, and re-enqueue the remaining frames
413 static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch,
414 struct sk_buff **to_free)
416 struct sk_buff *segs;
417 netdev_features_t features = netif_skb_features(skb);
419 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
421 if (IS_ERR_OR_NULL(segs)) {
422 qdisc_drop(skb, sch, to_free);
430 * Insert one skb into qdisc.
431 * Note: parent depends on return value to account for queue length.
432 * NET_XMIT_DROP: queue length didn't change.
433 * NET_XMIT_SUCCESS: one skb was queued.
435 static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
436 struct sk_buff **to_free)
438 struct netem_sched_data *q = qdisc_priv(sch);
439 /* We don't fill cb now as skb_unshare() may invalidate it */
440 struct netem_skb_cb *cb;
441 struct sk_buff *skb2;
442 struct sk_buff *segs = NULL;
443 unsigned int prev_len = qdisc_pkt_len(skb);
445 int rc = NET_XMIT_SUCCESS;
446 int rc_drop = NET_XMIT_DROP;
448 /* Do not fool qdisc_drop_all() */
451 /* Random duplication */
452 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
457 if (q->ecn && INET_ECN_set_ce(skb))
458 qdisc_qstats_drop(sch); /* mark packet */
463 qdisc_qstats_drop(sch);
464 __qdisc_drop(skb, to_free);
465 return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
468 /* If a delay is expected, orphan the skb. (orphaning usually takes
469 * place at TX completion time, so _before_ the link transit delay)
471 if (q->latency || q->jitter || q->rate)
472 skb_orphan_partial(skb);
475 * If we need to duplicate packet, then re-insert at top of the
476 * qdisc tree, since parent queuer expects that only one
477 * skb will be queued.
479 if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
480 struct Qdisc *rootq = qdisc_root_bh(sch);
481 u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
484 rootq->enqueue(skb2, rootq, to_free);
485 q->duplicate = dupsave;
486 rc_drop = NET_XMIT_SUCCESS;
490 * Randomized packet corruption.
491 * Make copy if needed since we are modifying
492 * If packet is going to be hardware checksummed, then
493 * do it now in software before we mangle it.
495 if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
496 if (skb_is_gso(skb)) {
497 skb = netem_segment(skb, sch, to_free);
501 skb_mark_not_on_list(skb);
502 qdisc_skb_cb(skb)->pkt_len = skb->len;
505 skb = skb_unshare(skb, GFP_ATOMIC);
506 if (unlikely(!skb)) {
507 qdisc_qstats_drop(sch);
510 if (skb->ip_summed == CHECKSUM_PARTIAL &&
511 skb_checksum_help(skb)) {
512 qdisc_drop(skb, sch, to_free);
517 skb->data[get_random_u32_below(skb_headlen(skb))] ^=
518 1<<get_random_u32_below(8);
521 if (unlikely(sch->q.qlen >= sch->limit)) {
522 /* re-link segs, so that qdisc_drop_all() frees them all */
524 qdisc_drop_all(skb, sch, to_free);
528 qdisc_qstats_backlog_inc(sch, skb);
530 cb = netem_skb_cb(skb);
531 if (q->gap == 0 || /* not doing reordering */
532 q->counter < q->gap - 1 || /* inside last reordering gap */
533 q->reorder < get_crandom(&q->reorder_cor)) {
537 delay = tabledist(q->latency, q->jitter,
538 &q->delay_cor, q->delay_dist);
540 now = ktime_get_ns();
543 struct netem_skb_cb *last = NULL;
546 last = netem_skb_cb(sch->q.tail);
547 if (q->t_root.rb_node) {
548 struct sk_buff *t_skb;
549 struct netem_skb_cb *t_last;
551 t_skb = skb_rb_last(&q->t_root);
552 t_last = netem_skb_cb(t_skb);
554 t_last->time_to_send > last->time_to_send)
558 struct netem_skb_cb *t_last =
559 netem_skb_cb(q->t_tail);
562 t_last->time_to_send > last->time_to_send)
568 * Last packet in queue is reference point (now),
569 * calculate this time bonus and subtract
572 delay -= last->time_to_send - now;
573 delay = max_t(s64, 0, delay);
574 now = last->time_to_send;
577 delay += packet_time_ns(qdisc_pkt_len(skb), q);
580 cb->time_to_send = now + delay;
582 tfifo_enqueue(skb, sch);
585 * Do re-ordering by putting one out of N packets at the front
588 cb->time_to_send = ktime_get_ns();
591 __qdisc_enqueue_head(skb, &sch->q);
592 sch->qstats.requeues++;
597 unsigned int len, last_len;
600 len = skb ? skb->len : 0;
605 skb_mark_not_on_list(segs);
606 qdisc_skb_cb(segs)->pkt_len = segs->len;
607 last_len = segs->len;
608 rc = qdisc_enqueue(segs, sch, to_free);
609 if (rc != NET_XMIT_SUCCESS) {
610 if (net_xmit_drop_count(rc))
611 qdisc_qstats_drop(sch);
618 /* Parent qdiscs accounted for 1 skb of size @prev_len */
619 qdisc_tree_reduce_backlog(sch, -(nb - 1), -(len - prev_len));
621 return NET_XMIT_DROP;
623 return NET_XMIT_SUCCESS;
626 /* Delay the next round with a new future slot with a
627 * correct number of bytes and packets.
630 static void get_slot_next(struct netem_sched_data *q, u64 now)
635 next_delay = q->slot_config.min_delay +
637 (q->slot_config.max_delay -
638 q->slot_config.min_delay) >> 32);
640 next_delay = tabledist(q->slot_config.dist_delay,
641 (s32)(q->slot_config.dist_jitter),
644 q->slot.slot_next = now + next_delay;
645 q->slot.packets_left = q->slot_config.max_packets;
646 q->slot.bytes_left = q->slot_config.max_bytes;
649 static struct sk_buff *netem_peek(struct netem_sched_data *q)
651 struct sk_buff *skb = skb_rb_first(&q->t_root);
659 t1 = netem_skb_cb(skb)->time_to_send;
660 t2 = netem_skb_cb(q->t_head)->time_to_send;
666 static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
668 if (skb == q->t_head) {
669 q->t_head = skb->next;
673 rb_erase(&skb->rbnode, &q->t_root);
677 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
679 struct netem_sched_data *q = qdisc_priv(sch);
683 skb = __qdisc_dequeue_head(&sch->q);
685 qdisc_qstats_backlog_dec(sch, skb);
687 qdisc_bstats_update(sch, skb);
693 u64 now = ktime_get_ns();
695 /* if more time remaining? */
696 time_to_send = netem_skb_cb(skb)->time_to_send;
697 if (q->slot.slot_next && q->slot.slot_next < time_to_send)
698 get_slot_next(q, now);
700 if (time_to_send <= now && q->slot.slot_next <= now) {
701 netem_erase_head(q, skb);
703 qdisc_qstats_backlog_dec(sch, skb);
706 /* skb->dev shares skb->rbnode area,
707 * we need to restore its value.
709 skb->dev = qdisc_dev(sch);
711 if (q->slot.slot_next) {
712 q->slot.packets_left--;
713 q->slot.bytes_left -= qdisc_pkt_len(skb);
714 if (q->slot.packets_left <= 0 ||
715 q->slot.bytes_left <= 0)
716 get_slot_next(q, now);
720 unsigned int pkt_len = qdisc_pkt_len(skb);
721 struct sk_buff *to_free = NULL;
724 err = qdisc_enqueue(skb, q->qdisc, &to_free);
725 kfree_skb_list(to_free);
726 if (err != NET_XMIT_SUCCESS &&
727 net_xmit_drop_count(err)) {
728 qdisc_qstats_drop(sch);
729 qdisc_tree_reduce_backlog(sch, 1,
738 skb = q->qdisc->ops->dequeue(q->qdisc);
743 qdisc_watchdog_schedule_ns(&q->watchdog,
749 skb = q->qdisc->ops->dequeue(q->qdisc);
756 static void netem_reset(struct Qdisc *sch)
758 struct netem_sched_data *q = qdisc_priv(sch);
760 qdisc_reset_queue(sch);
763 qdisc_reset(q->qdisc);
764 qdisc_watchdog_cancel(&q->watchdog);
767 static void dist_free(struct disttable *d)
773 * Distribution data is a variable size payload containing
774 * signed 16 bit values.
777 static int get_dist_table(struct disttable **tbl, const struct nlattr *attr)
779 size_t n = nla_len(attr)/sizeof(__s16);
780 const __s16 *data = nla_data(attr);
784 if (!n || n > NETEM_DIST_MAX)
787 d = kvmalloc(struct_size(d, table, n), GFP_KERNEL);
792 for (i = 0; i < n; i++)
793 d->table[i] = data[i];
799 static void get_slot(struct netem_sched_data *q, const struct nlattr *attr)
801 const struct tc_netem_slot *c = nla_data(attr);
804 if (q->slot_config.max_packets == 0)
805 q->slot_config.max_packets = INT_MAX;
806 if (q->slot_config.max_bytes == 0)
807 q->slot_config.max_bytes = INT_MAX;
809 /* capping dist_jitter to the range acceptable by tabledist() */
810 q->slot_config.dist_jitter = min_t(__s64, INT_MAX, abs(q->slot_config.dist_jitter));
812 q->slot.packets_left = q->slot_config.max_packets;
813 q->slot.bytes_left = q->slot_config.max_bytes;
814 if (q->slot_config.min_delay | q->slot_config.max_delay |
815 q->slot_config.dist_jitter)
816 q->slot.slot_next = ktime_get_ns();
818 q->slot.slot_next = 0;
821 static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr)
823 const struct tc_netem_corr *c = nla_data(attr);
825 init_crandom(&q->delay_cor, c->delay_corr);
826 init_crandom(&q->loss_cor, c->loss_corr);
827 init_crandom(&q->dup_cor, c->dup_corr);
830 static void get_reorder(struct netem_sched_data *q, const struct nlattr *attr)
832 const struct tc_netem_reorder *r = nla_data(attr);
834 q->reorder = r->probability;
835 init_crandom(&q->reorder_cor, r->correlation);
838 static void get_corrupt(struct netem_sched_data *q, const struct nlattr *attr)
840 const struct tc_netem_corrupt *r = nla_data(attr);
842 q->corrupt = r->probability;
843 init_crandom(&q->corrupt_cor, r->correlation);
846 static void get_rate(struct netem_sched_data *q, const struct nlattr *attr)
848 const struct tc_netem_rate *r = nla_data(attr);
851 q->packet_overhead = r->packet_overhead;
852 q->cell_size = r->cell_size;
853 q->cell_overhead = r->cell_overhead;
855 q->cell_size_reciprocal = reciprocal_value(q->cell_size);
857 q->cell_size_reciprocal = (struct reciprocal_value) { 0 };
860 static int get_loss_clg(struct netem_sched_data *q, const struct nlattr *attr)
862 const struct nlattr *la;
865 nla_for_each_nested(la, attr, rem) {
866 u16 type = nla_type(la);
869 case NETEM_LOSS_GI: {
870 const struct tc_netem_gimodel *gi = nla_data(la);
872 if (nla_len(la) < sizeof(struct tc_netem_gimodel)) {
873 pr_info("netem: incorrect gi model size\n");
877 q->loss_model = CLG_4_STATES;
879 q->clg.state = TX_IN_GAP_PERIOD;
888 case NETEM_LOSS_GE: {
889 const struct tc_netem_gemodel *ge = nla_data(la);
891 if (nla_len(la) < sizeof(struct tc_netem_gemodel)) {
892 pr_info("netem: incorrect ge model size\n");
896 q->loss_model = CLG_GILB_ELL;
897 q->clg.state = GOOD_STATE;
906 pr_info("netem: unknown loss type %u\n", type);
914 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
915 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
916 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
917 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
918 [TCA_NETEM_RATE] = { .len = sizeof(struct tc_netem_rate) },
919 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
920 [TCA_NETEM_ECN] = { .type = NLA_U32 },
921 [TCA_NETEM_RATE64] = { .type = NLA_U64 },
922 [TCA_NETEM_LATENCY64] = { .type = NLA_S64 },
923 [TCA_NETEM_JITTER64] = { .type = NLA_S64 },
924 [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) },
927 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
928 const struct nla_policy *policy, int len)
930 int nested_len = nla_len(nla) - NLA_ALIGN(len);
932 if (nested_len < 0) {
933 pr_info("netem: invalid attributes len %d\n", nested_len);
937 if (nested_len >= nla_attr_size(0))
938 return nla_parse_deprecated(tb, maxtype,
939 nla_data(nla) + NLA_ALIGN(len),
940 nested_len, policy, NULL);
942 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
946 /* Parse netlink message to set options */
947 static int netem_change(struct Qdisc *sch, struct nlattr *opt,
948 struct netlink_ext_ack *extack)
950 struct netem_sched_data *q = qdisc_priv(sch);
951 struct nlattr *tb[TCA_NETEM_MAX + 1];
952 struct disttable *delay_dist = NULL;
953 struct disttable *slot_dist = NULL;
954 struct tc_netem_qopt *qopt;
955 struct clgstate old_clg;
956 int old_loss_model = CLG_RANDOM;
959 qopt = nla_data(opt);
960 ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
964 if (tb[TCA_NETEM_DELAY_DIST]) {
965 ret = get_dist_table(&delay_dist, tb[TCA_NETEM_DELAY_DIST]);
970 if (tb[TCA_NETEM_SLOT_DIST]) {
971 ret = get_dist_table(&slot_dist, tb[TCA_NETEM_SLOT_DIST]);
977 /* backup q->clg and q->loss_model */
979 old_loss_model = q->loss_model;
981 if (tb[TCA_NETEM_LOSS]) {
982 ret = get_loss_clg(q, tb[TCA_NETEM_LOSS]);
984 q->loss_model = old_loss_model;
989 q->loss_model = CLG_RANDOM;
993 swap(q->delay_dist, delay_dist);
995 swap(q->slot_dist, slot_dist);
996 sch->limit = qopt->limit;
998 q->latency = PSCHED_TICKS2NS(qopt->latency);
999 q->jitter = PSCHED_TICKS2NS(qopt->jitter);
1000 q->limit = qopt->limit;
1003 q->loss = qopt->loss;
1004 q->duplicate = qopt->duplicate;
1006 /* for compatibility with earlier versions.
1007 * if gap is set, need to assume 100% probability
1012 if (tb[TCA_NETEM_CORR])
1013 get_correlation(q, tb[TCA_NETEM_CORR]);
1015 if (tb[TCA_NETEM_REORDER])
1016 get_reorder(q, tb[TCA_NETEM_REORDER]);
1018 if (tb[TCA_NETEM_CORRUPT])
1019 get_corrupt(q, tb[TCA_NETEM_CORRUPT]);
1021 if (tb[TCA_NETEM_RATE])
1022 get_rate(q, tb[TCA_NETEM_RATE]);
1024 if (tb[TCA_NETEM_RATE64])
1025 q->rate = max_t(u64, q->rate,
1026 nla_get_u64(tb[TCA_NETEM_RATE64]));
1028 if (tb[TCA_NETEM_LATENCY64])
1029 q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]);
1031 if (tb[TCA_NETEM_JITTER64])
1032 q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]);
1034 if (tb[TCA_NETEM_ECN])
1035 q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]);
1037 if (tb[TCA_NETEM_SLOT])
1038 get_slot(q, tb[TCA_NETEM_SLOT]);
1040 /* capping jitter to the range acceptable by tabledist() */
1041 q->jitter = min_t(s64, abs(q->jitter), INT_MAX);
1044 sch_tree_unlock(sch);
1047 dist_free(delay_dist);
1048 dist_free(slot_dist);
1052 static int netem_init(struct Qdisc *sch, struct nlattr *opt,
1053 struct netlink_ext_ack *extack)
1055 struct netem_sched_data *q = qdisc_priv(sch);
1058 qdisc_watchdog_init(&q->watchdog, sch);
1063 q->loss_model = CLG_RANDOM;
1064 ret = netem_change(sch, opt, extack);
1066 pr_info("netem: change failed\n");
1070 static void netem_destroy(struct Qdisc *sch)
1072 struct netem_sched_data *q = qdisc_priv(sch);
1074 qdisc_watchdog_cancel(&q->watchdog);
1076 qdisc_put(q->qdisc);
1077 dist_free(q->delay_dist);
1078 dist_free(q->slot_dist);
1081 static int dump_loss_model(const struct netem_sched_data *q,
1082 struct sk_buff *skb)
1084 struct nlattr *nest;
1086 nest = nla_nest_start_noflag(skb, TCA_NETEM_LOSS);
1088 goto nla_put_failure;
1090 switch (q->loss_model) {
1092 /* legacy loss model */
1093 nla_nest_cancel(skb, nest);
1094 return 0; /* no data */
1096 case CLG_4_STATES: {
1097 struct tc_netem_gimodel gi = {
1105 if (nla_put(skb, NETEM_LOSS_GI, sizeof(gi), &gi))
1106 goto nla_put_failure;
1109 case CLG_GILB_ELL: {
1110 struct tc_netem_gemodel ge = {
1117 if (nla_put(skb, NETEM_LOSS_GE, sizeof(ge), &ge))
1118 goto nla_put_failure;
1123 nla_nest_end(skb, nest);
1127 nla_nest_cancel(skb, nest);
1131 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
1133 const struct netem_sched_data *q = qdisc_priv(sch);
1134 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
1135 struct tc_netem_qopt qopt;
1136 struct tc_netem_corr cor;
1137 struct tc_netem_reorder reorder;
1138 struct tc_netem_corrupt corrupt;
1139 struct tc_netem_rate rate;
1140 struct tc_netem_slot slot;
1142 qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
1144 qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
1146 qopt.limit = q->limit;
1147 qopt.loss = q->loss;
1149 qopt.duplicate = q->duplicate;
1150 if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt))
1151 goto nla_put_failure;
1153 if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency))
1154 goto nla_put_failure;
1156 if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter))
1157 goto nla_put_failure;
1159 cor.delay_corr = q->delay_cor.rho;
1160 cor.loss_corr = q->loss_cor.rho;
1161 cor.dup_corr = q->dup_cor.rho;
1162 if (nla_put(skb, TCA_NETEM_CORR, sizeof(cor), &cor))
1163 goto nla_put_failure;
1165 reorder.probability = q->reorder;
1166 reorder.correlation = q->reorder_cor.rho;
1167 if (nla_put(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder))
1168 goto nla_put_failure;
1170 corrupt.probability = q->corrupt;
1171 corrupt.correlation = q->corrupt_cor.rho;
1172 if (nla_put(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt))
1173 goto nla_put_failure;
1175 if (q->rate >= (1ULL << 32)) {
1176 if (nla_put_u64_64bit(skb, TCA_NETEM_RATE64, q->rate,
1178 goto nla_put_failure;
1181 rate.rate = q->rate;
1183 rate.packet_overhead = q->packet_overhead;
1184 rate.cell_size = q->cell_size;
1185 rate.cell_overhead = q->cell_overhead;
1186 if (nla_put(skb, TCA_NETEM_RATE, sizeof(rate), &rate))
1187 goto nla_put_failure;
1189 if (q->ecn && nla_put_u32(skb, TCA_NETEM_ECN, q->ecn))
1190 goto nla_put_failure;
1192 if (dump_loss_model(q, skb) != 0)
1193 goto nla_put_failure;
1195 if (q->slot_config.min_delay | q->slot_config.max_delay |
1196 q->slot_config.dist_jitter) {
1197 slot = q->slot_config;
1198 if (slot.max_packets == INT_MAX)
1199 slot.max_packets = 0;
1200 if (slot.max_bytes == INT_MAX)
1202 if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot))
1203 goto nla_put_failure;
1206 return nla_nest_end(skb, nla);
1209 nlmsg_trim(skb, nla);
1213 static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
1214 struct sk_buff *skb, struct tcmsg *tcm)
1216 struct netem_sched_data *q = qdisc_priv(sch);
1218 if (cl != 1 || !q->qdisc) /* only one class */
1221 tcm->tcm_handle |= TC_H_MIN(1);
1222 tcm->tcm_info = q->qdisc->handle;
1227 static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1228 struct Qdisc **old, struct netlink_ext_ack *extack)
1230 struct netem_sched_data *q = qdisc_priv(sch);
1232 *old = qdisc_replace(sch, new, &q->qdisc);
1236 static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
1238 struct netem_sched_data *q = qdisc_priv(sch);
1242 static unsigned long netem_find(struct Qdisc *sch, u32 classid)
1247 static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
1249 if (!walker->stop) {
1250 if (!tc_qdisc_stats_dump(sch, 1, walker))
1255 static const struct Qdisc_class_ops netem_class_ops = {
1256 .graft = netem_graft,
1260 .dump = netem_dump_class,
1263 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
1265 .cl_ops = &netem_class_ops,
1266 .priv_size = sizeof(struct netem_sched_data),
1267 .enqueue = netem_enqueue,
1268 .dequeue = netem_dequeue,
1269 .peek = qdisc_peek_dequeued,
1271 .reset = netem_reset,
1272 .destroy = netem_destroy,
1273 .change = netem_change,
1275 .owner = THIS_MODULE,
1279 static int __init netem_module_init(void)
1281 pr_info("netem: version " VERSION "\n");
1282 return register_qdisc(&netem_qdisc_ops);
1284 static void __exit netem_module_exit(void)
1286 unregister_qdisc(&netem_qdisc_ops);
1288 module_init(netem_module_init)
1289 module_exit(netem_module_exit)
1290 MODULE_LICENSE("GPL");