[linux-2.6-block.git] / net / sched / sch_tbf.c

/*
 * net/sched/sch_tbf.c	Token Bucket Filter queue.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 *						 original idea by Martin Devera
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>


/*	Simple Token Bucket Filter.
	=======================================

	SOURCE.
	-------

	None.

	Description.
	------------

	A data flow obeys TBF with rate R and depth B, if for any
	time interval t_i...t_f the number of transmitted bits
	does not exceed B + R*(t_f-t_i).

	Packetized version of this definition:
	The sequence of packets of sizes s_i served at moments t_i
	obeys TBF, if for any i<=k:

	s_i+....+s_k <= B + R*(t_k - t_i)

	Algorithm.
	----------

	Let N(t_i) be B/R initially and N(t) grow continuously with time as:

	N(t+delta) = min{B/R, N(t) + delta}

	If the first packet in queue has length S, it may be
	transmitted only at the time t_* when S/R <= N(t_*),
	and in this case N(t) jumps:

	N(t_* + 0) = N(t_* - 0) - S/R.


	Actually, QoS requires two TBF to be applied to a data stream.
	One of them controls steady state burst size, another
	one with rate P (peak rate) and depth M (equal to link MTU)
	limits bursts at a smaller time scale.

	It is easy to see that P>R, and B>M. If P is infinity, this double
	TBF is equivalent to a single one.

	When TBF works in reshaping mode, latency is estimated as:

	lat = max ((L-B)/R, (L-M)/P)


	NOTES.
	------

	If TBF throttles, it starts a watchdog timer, which will wake it up
	when it is ready to transmit.
	Note that the minimal timer resolution is 1/HZ.
	If no new packets arrive during this period,
	or if the device is not awaken by EOI for some previous packet,
	TBF can stop its activity for 1/HZ.


	This means, that with depth B, the maximal rate is

	R_crit = B*HZ

	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.

	Note that the peak rate TBF is much more tough: with MTU 1500
	P_crit = 150Kbytes/sec. So, if you need greater peak
	rates, use alpha with HZ=1000 :-)

	With classful TBF, limit is just kept for backwards compatibility.
	It is passed to the default bfifo qdisc - if the inner qdisc is
	changed the limit is not effective anymore.
*/

struct tbf_sched_data
{
/* Parameters */
	u32		limit;		/* Maximal length of backlog: bytes */
	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
	u32		mtu;
	u32		max_size;
	struct qdisc_rate_table	*R_tab;
	struct qdisc_rate_table	*P_tab;

/* Variables */
	long	tokens;			/* Current number of B tokens */
	long	ptokens;		/* Current number of P tokens */
	psched_time_t	t_c;		/* Time check-point */
	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
	struct qdisc_watchdog watchdog;	/* Watchdog timer */
};

#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)

static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if (qdisc_pkt_len(skb) > q->max_size)
		return qdisc_reshape_fail(skb, sch);

	ret = qdisc_enqueue(skb, q->qdisc);
	if (ret != 0) {
		if (net_xmit_drop_count(ret))
			sch->qstats.drops++;
		return ret;
	}

	sch->q.qlen++;
	sch->bstats.bytes += qdisc_pkt_len(skb);
	sch->bstats.packets++;
	return 0;
}

static unsigned int tbf_drop(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	unsigned int len = 0;

	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
		sch->q.qlen--;
		sch->qstats.drops++;
	}
	return len;
}

static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;

	skb = q->qdisc->ops->peek(q->qdisc);

	if (skb) {
		psched_time_t now;
		long toks;
		long ptoks = 0;
		unsigned int len = qdisc_pkt_len(skb);

		now = psched_get_time();
		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);

		if (q->P_tab) {
			ptoks = toks + q->ptokens;
			if (ptoks > (long)q->mtu)
				ptoks = q->mtu;
			ptoks -= L2T_P(q, len);
		}
		toks += q->tokens;
		if (toks > (long)q->buffer)
			toks = q->buffer;
		toks -= L2T(q, len);

		if ((toks|ptoks) >= 0) {
			skb = qdisc_dequeue_peeked(q->qdisc);
			if (unlikely(!skb))
				return NULL;

			q->t_c = now;
			q->tokens = toks;
			q->ptokens = ptoks;
			sch->q.qlen--;
			sch->flags &= ~TCQ_F_THROTTLED;
			return skb;
		}

		qdisc_watchdog_schedule(&q->watchdog,
					now + max_t(long, -toks, -ptoks));

		/* Maybe we have a shorter packet in the queue,
		   which can be sent now. It sounds cool,
		   but, however, this is wrong in principle.
		   We MUST NOT reorder packets under these circumstances.

		   Really, if we split the flow into independent
		   subflows, it would be a very good solution.
		   This is the main idea of all FQ algorithms
		   (cf. CSZ, HPFQ, HFSC)
		 */

		sch->qstats.overlimits++;
	}
	return NULL;
}

static void tbf_reset(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_reset(q->qdisc);
	sch->q.qlen = 0;
	q->t_c = psched_get_time();
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	qdisc_watchdog_cancel(&q->watchdog);
}

static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};

static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
{
	int err;
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab = NULL;
	struct qdisc_rate_table *ptab = NULL;
	struct Qdisc *child = NULL;
	int max_size,n;

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (tb[TCA_TBF_PARMS] == NULL)
		goto done;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if (rtab == NULL)
		goto done;

	if (qopt->peakrate.rate) {
		if (qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if (ptab == NULL)
			goto done;
	}

	for (n = 0; n < 256; n++)
		if (rtab->data[n] > qopt->buffer) break;
	max_size = (n << qopt->rate.cell_log)-1;
	if (ptab) {
		int size;

		for (n = 0; n < 256; n++)
			if (ptab->data[n] > qopt->mtu) break;
		size = (n << qopt->peakrate.cell_log)-1;
		if (size < max_size) max_size = size;
	}
	if (max_size < 0)
		goto done;

	if (qopt->limit > 0) {
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
		if (IS_ERR(child)) {
			err = PTR_ERR(child);
			goto done;
		}
	}

	sch_tree_lock(sch);
	if (child) {
		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
		qdisc_destroy(q->qdisc);
		q->qdisc = child;
	}
	q->limit = qopt->limit;
	q->mtu = qopt->mtu;
	q->max_size = max_size;
	q->buffer = qopt->buffer;
	q->tokens = q->buffer;
	q->ptokens = q->mtu;

	swap(q->R_tab, rtab);
	swap(q->P_tab, ptab);

	sch_tree_unlock(sch);
	err = 0;
done:
	if (rtab)
		qdisc_put_rtab(rtab);
	if (ptab)
		qdisc_put_rtab(ptab);
	return err;
}

static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (opt == NULL)
		return -EINVAL;

	q->t_c = psched_get_time();
	qdisc_watchdog_init(&q->watchdog, sch);
	q->qdisc = &noop_qdisc;

	return tbf_change(sch, opt);
}

static void tbf_destroy(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_watchdog_cancel(&q->watchdog);

	if (q->P_tab)
		qdisc_put_rtab(q->P_tab);
	if (q->R_tab)
		qdisc_put_rtab(q->R_tab);

	qdisc_destroy(q->qdisc);
}

static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *nest;
	struct tc_tbf_qopt opt;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	opt.limit = q->limit;
	opt.rate = q->R_tab->rate;
	if (q->P_tab)
		opt.peakrate = q->P_tab->rate;
	else
		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	opt.mtu = q->mtu;
	opt.buffer = q->buffer;
	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);

	nla_nest_end(skb, nest);
	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
			  struct sk_buff *skb, struct tcmsg *tcm)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (cl != 1) 	/* only one class */
		return -ENOENT;

	tcm->tcm_handle |= TC_H_MIN(1);
	tcm->tcm_info = q->qdisc->handle;

	return 0;
}

static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
		     struct Qdisc **old)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (new == NULL)
		new = &noop_qdisc;

	sch_tree_lock(sch);
	*old = q->qdisc;
	q->qdisc = new;
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
	qdisc_reset(*old);
	sch_tree_unlock(sch);

	return 0;
}

static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	return q->qdisc;
}

static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
{
	return 1;
}

static void tbf_put(struct Qdisc *sch, unsigned long arg)
{
}

static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
			    struct nlattr **tca, unsigned long *arg)
{
	return -ENOSYS;
}

static int tbf_delete(struct Qdisc *sch, unsigned long arg)
{
	return -ENOSYS;
}

static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
	if (!walker->stop) {
		if (walker->count >= walker->skip)
			if (walker->fn(sch, 1, walker) < 0) {
				walker->stop = 1;
				return;
			}
		walker->count++;
	}
}

static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
{
	return NULL;
}

static const struct Qdisc_class_ops tbf_class_ops =
{
	.graft		=	tbf_graft,
	.leaf		=	tbf_leaf,
	.get		=	tbf_get,
	.put		=	tbf_put,
	.change		=	tbf_change_class,
	.delete		=	tbf_delete,
	.walk		=	tbf_walk,
	.tcf_chain	=	tbf_find_tcf,
	.dump		=	tbf_dump_class,
};

static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
	.next		=	NULL,
	.cl_ops		=	&tbf_class_ops,
	.id		=	"tbf",
	.priv_size	=	sizeof(struct tbf_sched_data),
	.enqueue	=	tbf_enqueue,
	.dequeue	=	tbf_dequeue,
	.peek		=	qdisc_peek_dequeued,
	.drop		=	tbf_drop,
	.init		=	tbf_init,
	.reset		=	tbf_reset,
	.destroy	=	tbf_destroy,
	.change		=	tbf_change,
	.dump		=	tbf_dump,
	.owner		=	THIS_MODULE,
};

static int __init tbf_module_init(void)
{
	return register_qdisc(&tbf_qdisc_ops);
}

static void __exit tbf_module_exit(void)
{
	unregister_qdisc(&tbf_qdisc_ops);
}
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
Commit	Line	Data
1da177e4 LT	1	/*
	2	* net/sched/sch_tbf.c Token Bucket Filter queue.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*
	9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
	10	* Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
	11	* original idea by Martin Devera
	12	*
	13	*/
	14
1da177e4	15	#include <linux/module.h>
1da177e4 LT	16	#include <linux/types.h>
1da177e4 LT	17	#include <linux/kernel.h>
1da177e4	18	#include <linux/string.h>
1da177e4	19	#include <linux/errno.h>
1da177e4	20	#include <linux/skbuff.h>
0ba48053	21	#include <net/netlink.h>
1da177e4 LT	22	#include <net/pkt_sched.h>
	23
	24
	25	/* Simple Token Bucket Filter.
	26	=======================================
	27
	28	SOURCE.
	29	-------
	30
	31	None.
	32
	33	Description.
	34	------------
	35
	36	A data flow obeys TBF with rate R and depth B, if for any
	37	time interval t_i...t_f the number of transmitted bits
	38	does not exceed B + R*(t_f-t_i).
	39
	40	Packetized version of this definition:
	41	The sequence of packets of sizes s_i served at moments t_i
	42	obeys TBF, if for any i<=k:
	43
	44	s_i+....+s_k <= B + R*(t_k - t_i)
	45
	46	Algorithm.
	47	----------
	48
	49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
	50
	51	N(t+delta) = min{B/R, N(t) + delta}
	52
	53	If the first packet in queue has length S, it may be
	54	transmitted only at the time t_* when S/R <= N(t_*),
	55	and in this case N(t) jumps:
	56
	57	N(t_* + 0) = N(t_* - 0) - S/R.
	58
	59
	60
	61	Actually, QoS requires two TBF to be applied to a data stream.
	62	One of them controls steady state burst size, another
	63	one with rate P (peak rate) and depth M (equal to link MTU)
	64	limits bursts at a smaller time scale.
	65
	66	It is easy to see that P>R, and B>M. If P is infinity, this double
	67	TBF is equivalent to a single one.
	68
	69	When TBF works in reshaping mode, latency is estimated as:
	70
	71	lat = max ((L-B)/R, (L-M)/P)
	72
	73
	74	NOTES.
	75	------
	76
	77	If TBF throttles, it starts a watchdog timer, which will wake it up
	78	when it is ready to transmit.
	79	Note that the minimal timer resolution is 1/HZ.
	80	If no new packets arrive during this period,
	81	or if the device is not awaken by EOI for some previous packet,
	82	TBF can stop its activity for 1/HZ.
	83
	84
	85	This means, that with depth B, the maximal rate is
86
87	R_crit = B*HZ
88
89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
90
91	Note that the peak rate TBF is much more tough: with MTU 1500
92	P_crit = 150Kbytes/sec. So, if you need greater peak
93	rates, use alpha with HZ=1000 :-)
94
95	With classful TBF, limit is just kept for backwards compatibility.
96	It is passed to the default bfifo qdisc - if the inner qdisc is
97	changed the limit is not effective anymore.
98	*/
99
100	struct tbf_sched_data
101	{
102	/* Parameters */
103	u32 limit; /* Maximal length of backlog: bytes */
104	u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105	u32 mtu;
106	u32 max_size;
107	struct qdisc_rate_table *R_tab;
108	struct qdisc_rate_table *P_tab;
109
110	/* Variables */
111	long tokens; /* Current number of B tokens */
112	long ptokens; /* Current number of P tokens */
113	psched_time_t t_c; /* Time check-point */
1da177e4	114	struct Qdisc qdisc; / Inner qdisc, default - bfifo queue */
f7f593e3	115	struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4 LT	116	};
1da177e4 LT	117
e9bef55d JDB	118	#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
e9bef55d JDB	119	#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
1da177e4 LT	120
	121	static int tbf_enqueue(struct sk_buff skb, struct Qdisc sch)
	122	{
	123	struct tbf_sched_data *q = qdisc_priv(sch);
	124	int ret;
	125
69747650 DM	126	if (qdisc_pkt_len(skb) > q->max_size)
69747650 DM	127	return qdisc_reshape_fail(skb, sch);
1da177e4	128
5f86173b JK	129	ret = qdisc_enqueue(skb, q->qdisc);
5f86173b JK	130	if (ret != 0) {
378a2f09 JP	131	if (net_xmit_drop_count(ret))
378a2f09 JP	132	sch->qstats.drops++;
1da177e4 LT	133	return ret;
	134	}
	135
	136	sch->q.qlen++;
0abf77e5	137	sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4 LT	138	sch->bstats.packets++;
	139	return 0;
	140	}
	141
1da177e4 LT	142	static unsigned int tbf_drop(struct Qdisc* sch)
	143	{
	144	struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26	145	unsigned int len = 0;
1da177e4	146
6d037a26	147	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4 LT	148	sch->q.qlen--;
	149	sch->qstats.drops++;
	150	}
	151	return len;
	152	}
	153
1da177e4 LT	154	static struct sk_buff tbf_dequeue(struct Qdisc sch)
	155	{
	156	struct tbf_sched_data *q = qdisc_priv(sch);
	157	struct sk_buff *skb;
	158
03c05f0d	159	skb = q->qdisc->ops->peek(q->qdisc);
1da177e4 LT	160
	161	if (skb) {
	162	psched_time_t now;
f7f593e3	163	long toks;
1da177e4	164	long ptoks = 0;
0abf77e5	165	unsigned int len = qdisc_pkt_len(skb);
1da177e4	166
3bebcda2	167	now = psched_get_time();
03cc45c0	168	toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
1da177e4 LT	169
	170	if (q->P_tab) {
	171	ptoks = toks + q->ptokens;
	172	if (ptoks > (long)q->mtu)
	173	ptoks = q->mtu;
	174	ptoks -= L2T_P(q, len);
	175	}
	176	toks += q->tokens;
	177	if (toks > (long)q->buffer)
	178	toks = q->buffer;
	179	toks -= L2T(q, len);
	180
	181	if ((toks\|ptoks) >= 0) {
77be155c	182	skb = qdisc_dequeue_peeked(q->qdisc);
03c05f0d JP	183	if (unlikely(!skb))
	184	return NULL;
	185
1da177e4 LT	186	q->t_c = now;
	187	q->tokens = toks;
	188	q->ptokens = ptoks;
	189	sch->q.qlen--;
	190	sch->flags &= ~TCQ_F_THROTTLED;
	191	return skb;
	192	}
	193
f7f593e3 PM	194	qdisc_watchdog_schedule(&q->watchdog,
f7f593e3 PM	195	now + max_t(long, -toks, -ptoks));
1da177e4 LT	196
	197	/* Maybe we have a shorter packet in the queue,
	198	which can be sent now. It sounds cool,
	199	but, however, this is wrong in principle.
	200	We MUST NOT reorder packets under these circumstances.
	201
	202	Really, if we split the flow into independent
	203	subflows, it would be a very good solution.
	204	This is the main idea of all FQ algorithms
	205	(cf. CSZ, HPFQ, HFSC)
	206	*/
	207
1da177e4 LT	208	sch->qstats.overlimits++;
	209	}
	210	return NULL;
	211	}
	212
	213	static void tbf_reset(struct Qdisc* sch)
	214	{
	215	struct tbf_sched_data *q = qdisc_priv(sch);
	216
	217	qdisc_reset(q->qdisc);
	218	sch->q.qlen = 0;
3bebcda2	219	q->t_c = psched_get_time();
1da177e4 LT	220	q->tokens = q->buffer;
1da177e4 LT	221	q->ptokens = q->mtu;
f7f593e3	222	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	223	}
1da177e4 LT	224
27a3421e PM	225	static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	226	[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
	227	[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	228	[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	229	};
	230
1e90474c	231	static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
1da177e4	232	{
cee63723	233	int err;
1da177e4	234	struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c	235	struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4 LT	236	struct tc_tbf_qopt *qopt;
	237	struct qdisc_rate_table *rtab = NULL;
	238	struct qdisc_rate_table *ptab = NULL;
	239	struct Qdisc *child = NULL;
	240	int max_size,n;
	241
27a3421e	242	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723 PM	243	if (err < 0)
	244	return err;
	245
	246	err = -EINVAL;
27a3421e	247	if (tb[TCA_TBF_PARMS] == NULL)
1da177e4 LT	248	goto done;
1da177e4 LT	249
1e90474c PM	250	qopt = nla_data(tb[TCA_TBF_PARMS]);
1e90474c PM	251	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4 LT	252	if (rtab == NULL)
	253	goto done;
	254
	255	if (qopt->peakrate.rate) {
	256	if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c	257	ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4 LT	258	if (ptab == NULL)
	259	goto done;
	260	}
	261
	262	for (n = 0; n < 256; n++)
	263	if (rtab->data[n] > qopt->buffer) break;
	264	max_size = (n << qopt->rate.cell_log)-1;
	265	if (ptab) {
	266	int size;
	267
	268	for (n = 0; n < 256; n++)
	269	if (ptab->data[n] > qopt->mtu) break;
	270	size = (n << qopt->peakrate.cell_log)-1;
	271	if (size < max_size) max_size = size;
	272	}
	273	if (max_size < 0)
	274	goto done;
	275
053cfed7	276	if (qopt->limit > 0) {
fb0305ce PM	277	child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
	278	if (IS_ERR(child)) {
	279	err = PTR_ERR(child);
1da177e4	280	goto done;
fb0305ce	281	}
1da177e4 LT	282	}
	283
	284	sch_tree_lock(sch);
5e50da01 PM	285	if (child) {
5e50da01 PM	286	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
b94c8afc PM	287	qdisc_destroy(q->qdisc);
b94c8afc PM	288	q->qdisc = child;
5e50da01	289	}
1da177e4 LT	290	q->limit = qopt->limit;
	291	q->mtu = qopt->mtu;
	292	q->max_size = max_size;
	293	q->buffer = qopt->buffer;
	294	q->tokens = q->buffer;
	295	q->ptokens = q->mtu;
b94c8afc	296
a0bffffc IJ	297	swap(q->R_tab, rtab);
a0bffffc IJ	298	swap(q->P_tab, ptab);
b94c8afc	299
1da177e4 LT	300	sch_tree_unlock(sch);
	301	err = 0;
	302	done:
	303	if (rtab)
	304	qdisc_put_rtab(rtab);
	305	if (ptab)
	306	qdisc_put_rtab(ptab);
	307	return err;
	308	}
	309
1e90474c	310	static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
1da177e4 LT	311	{
	312	struct tbf_sched_data *q = qdisc_priv(sch);
	313
	314	if (opt == NULL)
	315	return -EINVAL;
	316
3bebcda2	317	q->t_c = psched_get_time();
f7f593e3	318	qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 LT	319	q->qdisc = &noop_qdisc;
	320
	321	return tbf_change(sch, opt);
	322	}
	323
	324	static void tbf_destroy(struct Qdisc *sch)
	325	{
	326	struct tbf_sched_data *q = qdisc_priv(sch);
	327
f7f593e3	328	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	329
	330	if (q->P_tab)
	331	qdisc_put_rtab(q->P_tab);
	332	if (q->R_tab)
	333	qdisc_put_rtab(q->R_tab);
	334
	335	qdisc_destroy(q->qdisc);
	336	}
	337
	338	static int tbf_dump(struct Qdisc sch, struct sk_buff skb)
	339	{
	340	struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef	341	struct nlattr *nest;
1da177e4 LT	342	struct tc_tbf_qopt opt;
1da177e4 LT	343
4b3550ef PM	344	nest = nla_nest_start(skb, TCA_OPTIONS);
	345	if (nest == NULL)
	346	goto nla_put_failure;
1da177e4 LT	347
	348	opt.limit = q->limit;
	349	opt.rate = q->R_tab->rate;
	350	if (q->P_tab)
	351	opt.peakrate = q->P_tab->rate;
	352	else
	353	memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	354	opt.mtu = q->mtu;
	355	opt.buffer = q->buffer;
1e90474c	356	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
1da177e4	357
4b3550ef	358	nla_nest_end(skb, nest);
1da177e4 LT	359	return skb->len;
1da177e4 LT	360
1e90474c	361	nla_put_failure:
4b3550ef	362	nla_nest_cancel(skb, nest);
1da177e4 LT	363	return -1;
	364	}
	365
	366	static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
	367	struct sk_buff skb, struct tcmsg tcm)
	368	{
	369	struct tbf_sched_data *q = qdisc_priv(sch);
	370
	371	if (cl != 1) /* only one class */
	372	return -ENOENT;
	373
	374	tcm->tcm_handle \|= TC_H_MIN(1);
	375	tcm->tcm_info = q->qdisc->handle;
	376
	377	return 0;
	378	}
	379
	380	static int tbf_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
	381	struct Qdisc **old)
	382	{
	383	struct tbf_sched_data *q = qdisc_priv(sch);
	384
	385	if (new == NULL)
	386	new = &noop_qdisc;
	387
	388	sch_tree_lock(sch);
b94c8afc PM	389	*old = q->qdisc;
b94c8afc PM	390	q->qdisc = new;
5e50da01	391	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
1da177e4	392	qdisc_reset(*old);
1da177e4 LT	393	sch_tree_unlock(sch);
	394
	395	return 0;
	396	}
	397
	398	static struct Qdisc tbf_leaf(struct Qdisc sch, unsigned long arg)
	399	{
	400	struct tbf_sched_data *q = qdisc_priv(sch);
	401	return q->qdisc;
	402	}
	403
	404	static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
	405	{
	406	return 1;
	407	}
	408
	409	static void tbf_put(struct Qdisc *sch, unsigned long arg)
	410	{
	411	}
	412
10297b99	413	static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1e90474c	414	struct nlattr *tca, unsigned long arg)
1da177e4 LT	415	{
	416	return -ENOSYS;
	417	}
	418
	419	static int tbf_delete(struct Qdisc *sch, unsigned long arg)
	420	{
	421	return -ENOSYS;
	422	}
	423
	424	static void tbf_walk(struct Qdisc sch, struct qdisc_walker walker)
	425	{
	426	if (!walker->stop) {
	427	if (walker->count >= walker->skip)
	428	if (walker->fn(sch, 1, walker) < 0) {
	429	walker->stop = 1;
	430	return;
	431	}
	432	walker->count++;
	433	}
	434	}
	435
	436	static struct tcf_proto *tbf_find_tcf(struct Qdisc sch, unsigned long cl)
	437	{
	438	return NULL;
	439	}
	440
20fea08b	441	static const struct Qdisc_class_ops tbf_class_ops =
1da177e4 LT	442	{
	443	.graft = tbf_graft,
	444	.leaf = tbf_leaf,
	445	.get = tbf_get,
	446	.put = tbf_put,
	447	.change = tbf_change_class,
	448	.delete = tbf_delete,
	449	.walk = tbf_walk,
	450	.tcf_chain = tbf_find_tcf,
	451	.dump = tbf_dump_class,
	452	};
	453
20fea08b	454	static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4 LT	455	.next = NULL,
	456	.cl_ops = &tbf_class_ops,
	457	.id = "tbf",
	458	.priv_size = sizeof(struct tbf_sched_data),
	459	.enqueue = tbf_enqueue,
	460	.dequeue = tbf_dequeue,
77be155c	461	.peek = qdisc_peek_dequeued,
1da177e4 LT	462	.drop = tbf_drop,
	463	.init = tbf_init,
	464	.reset = tbf_reset,
	465	.destroy = tbf_destroy,
	466	.change = tbf_change,
	467	.dump = tbf_dump,
	468	.owner = THIS_MODULE,
	469	};
	470
	471	static int __init tbf_module_init(void)
	472	{
	473	return register_qdisc(&tbf_qdisc_ops);
	474	}
	475
	476	static void __exit tbf_module_exit(void)
	477	{
	478	unregister_qdisc(&tbf_qdisc_ops);
	479	}
	480	module_init(tbf_module_init)
	481	module_exit(tbf_module_exit)
	482	MODULE_LICENSE("GPL");