Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / net / sched / sch_netem.c
index 22cd46a600576f286803536d45875cd9d537cdca..75046ec7214449c631c38eaab5e4a51644cfa0e5 100644 (file)
@@ -77,6 +77,10 @@ struct netem_sched_data {
        /* internal t(ime)fifo qdisc uses t_root and sch->limit */
        struct rb_root t_root;
 
+       /* a linear queue; reduces rbtree rebalancing when jitter is low */
+       struct sk_buff  *t_head;
+       struct sk_buff  *t_tail;
+
        /* optional qdisc for classful handling (NULL at netem init) */
        struct Qdisc    *qdisc;
 
@@ -369,26 +373,39 @@ static void tfifo_reset(struct Qdisc *sch)
                rb_erase(&skb->rbnode, &q->t_root);
                rtnl_kfree_skbs(skb, skb);
        }
+
+       rtnl_kfree_skbs(q->t_head, q->t_tail);
+       q->t_head = NULL;
+       q->t_tail = NULL;
 }
 
 static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
        u64 tnext = netem_skb_cb(nskb)->time_to_send;
-       struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
 
-       while (*p) {
-               struct sk_buff *skb;
-
-               parent = *p;
-               skb = rb_to_skb(parent);
-               if (tnext >= netem_skb_cb(skb)->time_to_send)
-                       p = &parent->rb_right;
+       if (!q->t_tail || tnext >= netem_skb_cb(q->t_tail)->time_to_send) {
+               if (q->t_tail)
+                       q->t_tail->next = nskb;
                else
-                       p = &parent->rb_left;
+                       q->t_head = nskb;
+               q->t_tail = nskb;
+       } else {
+               struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
+
+               while (*p) {
+                       struct sk_buff *skb;
+
+                       parent = *p;
+                       skb = rb_to_skb(parent);
+                       if (tnext >= netem_skb_cb(skb)->time_to_send)
+                               p = &parent->rb_right;
+                       else
+                               p = &parent->rb_left;
+               }
+               rb_link_node(&nskb->rbnode, parent, p);
+               rb_insert_color(&nskb->rbnode, &q->t_root);
        }
-       rb_link_node(&nskb->rbnode, parent, p);
-       rb_insert_color(&nskb->rbnode, &q->t_root);
        sch->q.qlen++;
 }
 
@@ -533,9 +550,16 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                                t_skb = skb_rb_last(&q->t_root);
                                t_last = netem_skb_cb(t_skb);
                                if (!last ||
-                                   t_last->time_to_send > last->time_to_send) {
+                                   t_last->time_to_send > last->time_to_send)
+                                       last = t_last;
+                       }
+                       if (q->t_tail) {
+                               struct netem_skb_cb *t_last =
+                                       netem_skb_cb(q->t_tail);
+
+                               if (!last ||
+                                   t_last->time_to_send > last->time_to_send)
                                        last = t_last;
-                               }
                        }
 
                        if (last) {
@@ -614,11 +638,38 @@ static void get_slot_next(struct netem_sched_data *q, u64 now)
        q->slot.bytes_left = q->slot_config.max_bytes;
 }
 
+static struct sk_buff *netem_peek(struct netem_sched_data *q)
+{
+       struct sk_buff *skb = skb_rb_first(&q->t_root);
+       u64 t1, t2;
+
+       if (!skb)
+               return q->t_head;
+       if (!q->t_head)
+               return skb;
+
+       t1 = netem_skb_cb(skb)->time_to_send;
+       t2 = netem_skb_cb(q->t_head)->time_to_send;
+       if (t1 < t2)
+               return skb;
+       return q->t_head;
+}
+
+static void netem_erase_head(struct netem_sched_data *q, struct sk_buff *skb)
+{
+       if (skb == q->t_head) {
+               q->t_head = skb->next;
+               if (!q->t_head)
+                       q->t_tail = NULL;
+       } else {
+               rb_erase(&skb->rbnode, &q->t_root);
+       }
+}
+
 static struct sk_buff *netem_dequeue(struct Qdisc *sch)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-       struct rb_node *p;
 
 tfifo_dequeue:
        skb = __qdisc_dequeue_head(&sch->q);
@@ -628,20 +679,18 @@ deliver:
                qdisc_bstats_update(sch, skb);
                return skb;
        }
-       p = rb_first(&q->t_root);
-       if (p) {
+       skb = netem_peek(q);
+       if (skb) {
                u64 time_to_send;
                u64 now = ktime_get_ns();
 
-               skb = rb_to_skb(p);
-
                /* if more time remaining? */
                time_to_send = netem_skb_cb(skb)->time_to_send;
                if (q->slot.slot_next && q->slot.slot_next < time_to_send)
                        get_slot_next(q, now);
 
-               if (time_to_send <= now &&  q->slot.slot_next <= now) {
-                       rb_erase(p, &q->t_root);
+               if (time_to_send <= now && q->slot.slot_next <= now) {
+                       netem_erase_head(q, skb);
                        sch->q.qlen--;
                        qdisc_qstats_backlog_dec(sch, skb);
                        skb->next = NULL;