[linux-2.6-block.git] / net / sched / sch_tbf.c

/*
 * net/sched/sch_tbf.c	Token Bucket Filter queue.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 *						 original idea by Martin Devera
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>


/*	Simple Token Bucket Filter.
	=======================================

	SOURCE.
	-------

	None.

	Description.
	------------

	A data flow obeys TBF with rate R and depth B, if for any
	time interval t_i...t_f the number of transmitted bits
	does not exceed B + R*(t_f-t_i).

	Packetized version of this definition:
	The sequence of packets of sizes s_i served at moments t_i
	obeys TBF, if for any i<=k:

	s_i+....+s_k <= B + R*(t_k - t_i)

	Algorithm.
	----------

	Let N(t_i) be B/R initially and N(t) grow continuously with time as:

	N(t+delta) = min{B/R, N(t) + delta}

	If the first packet in queue has length S, it may be
	transmitted only at the time t_* when S/R <= N(t_*),
	and in this case N(t) jumps:

	N(t_* + 0) = N(t_* - 0) - S/R.


	Actually, QoS requires two TBF to be applied to a data stream.
	One of them controls steady state burst size, another
	one with rate P (peak rate) and depth M (equal to link MTU)
	limits bursts at a smaller time scale.

	It is easy to see that P>R, and B>M. If P is infinity, this double
	TBF is equivalent to a single one.

	When TBF works in reshaping mode, latency is estimated as:

	lat = max ((L-B)/R, (L-M)/P)


	NOTES.
	------

	If TBF throttles, it starts a watchdog timer, which will wake it up
	when it is ready to transmit.
	Note that the minimal timer resolution is 1/HZ.
	If no new packets arrive during this period,
	or if the device is not awaken by EOI for some previous packet,
	TBF can stop its activity for 1/HZ.


	This means, that with depth B, the maximal rate is

	R_crit = B*HZ

	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.

	Note that the peak rate TBF is much more tough: with MTU 1500
	P_crit = 150Kbytes/sec. So, if you need greater peak
	rates, use alpha with HZ=1000 :-)

	With classful TBF, limit is just kept for backwards compatibility.
	It is passed to the default bfifo qdisc - if the inner qdisc is
	changed the limit is not effective anymore.
*/

struct tbf_sched_data
{
/* Parameters */
	u32		limit;		/* Maximal length of backlog: bytes */
	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
	u32		mtu;
	u32		max_size;
	struct qdisc_rate_table	*R_tab;
	struct qdisc_rate_table	*P_tab;

/* Variables */
	long	tokens;			/* Current number of B tokens */
	long	ptokens;		/* Current number of P tokens */
	psched_time_t	t_c;		/* Time check-point */
	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
	struct qdisc_watchdog watchdog;	/* Watchdog timer */
};

#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)

static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if (skb->len > q->max_size) {
		sch->qstats.drops++;
#ifdef CONFIG_NET_CLS_ACT
		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
#endif
			kfree_skb(skb);

		return NET_XMIT_DROP;
	}

	ret = qdisc_enqueue(skb, q->qdisc);
	if (ret != 0) {
		sch->qstats.drops++;
		return ret;
	}

	sch->q.qlen++;
	sch->bstats.bytes += skb->len;
	sch->bstats.packets++;
	return 0;
}

static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
		sch->q.qlen++;
		sch->qstats.requeues++;
	}

	return ret;
}

static unsigned int tbf_drop(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	unsigned int len = 0;

	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
		sch->q.qlen--;
		sch->qstats.drops++;
	}
	return len;
}

static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;

	skb = q->qdisc->dequeue(q->qdisc);

	if (skb) {
		psched_time_t now;
		long toks;
		long ptoks = 0;
		unsigned int len = skb->len;

		now = psched_get_time();
		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);

		if (q->P_tab) {
			ptoks = toks + q->ptokens;
			if (ptoks > (long)q->mtu)
				ptoks = q->mtu;
			ptoks -= L2T_P(q, len);
		}
		toks += q->tokens;
		if (toks > (long)q->buffer)
			toks = q->buffer;
		toks -= L2T(q, len);

		if ((toks|ptoks) >= 0) {
			q->t_c = now;
			q->tokens = toks;
			q->ptokens = ptoks;
			sch->q.qlen--;
			sch->flags &= ~TCQ_F_THROTTLED;
			return skb;
		}

		qdisc_watchdog_schedule(&q->watchdog,
					now + max_t(long, -toks, -ptoks));

		/* Maybe we have a shorter packet in the queue,
		   which can be sent now. It sounds cool,
		   but, however, this is wrong in principle.
		   We MUST NOT reorder packets under these circumstances.

		   Really, if we split the flow into independent
		   subflows, it would be a very good solution.
		   This is the main idea of all FQ algorithms
		   (cf. CSZ, HPFQ, HFSC)
		 */

		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
			/* When requeue fails skb is dropped */
			qdisc_tree_decrease_qlen(q->qdisc, 1);
			sch->qstats.drops++;
		}

		sch->qstats.overlimits++;
	}
	return NULL;
}

static void tbf_reset(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_reset(q->qdisc);
	sch->q.qlen = 0;
	q->t_c = psched_get_time();
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	qdisc_watchdog_cancel(&q->watchdog);
}

static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};

static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
{
	int err;
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab = NULL;
	struct qdisc_rate_table *ptab = NULL;
	struct Qdisc *child = NULL;
	int max_size,n;

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (tb[TCA_TBF_PARMS] == NULL)
		goto done;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if (rtab == NULL)
		goto done;

	if (qopt->peakrate.rate) {
		if (qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if (ptab == NULL)
			goto done;
	}

	for (n = 0; n < 256; n++)
		if (rtab->data[n] > qopt->buffer) break;
	max_size = (n << qopt->rate.cell_log)-1;
	if (ptab) {
		int size;

		for (n = 0; n < 256; n++)
			if (ptab->data[n] > qopt->mtu) break;
		size = (n << qopt->peakrate.cell_log)-1;
		if (size < max_size) max_size = size;
	}
	if (max_size < 0)
		goto done;

	if (qopt->limit > 0) {
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
		if (IS_ERR(child)) {
			err = PTR_ERR(child);
			goto done;
		}
	}

	sch_tree_lock(sch);
	if (child) {
		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
		qdisc_destroy(xchg(&q->qdisc, child));
	}
	q->limit = qopt->limit;
	q->mtu = qopt->mtu;
	q->max_size = max_size;
	q->buffer = qopt->buffer;
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	rtab = xchg(&q->R_tab, rtab);
	ptab = xchg(&q->P_tab, ptab);
	sch_tree_unlock(sch);
	err = 0;
done:
	if (rtab)
		qdisc_put_rtab(rtab);
	if (ptab)
		qdisc_put_rtab(ptab);
	return err;
}

static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (opt == NULL)
		return -EINVAL;

	q->t_c = psched_get_time();
	qdisc_watchdog_init(&q->watchdog, sch);
	q->qdisc = &noop_qdisc;

	return tbf_change(sch, opt);
}

static void tbf_destroy(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_watchdog_cancel(&q->watchdog);

	if (q->P_tab)
		qdisc_put_rtab(q->P_tab);
	if (q->R_tab)
		qdisc_put_rtab(q->R_tab);

	qdisc_destroy(q->qdisc);
}

static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *nest;
	struct tc_tbf_qopt opt;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	opt.limit = q->limit;
	opt.rate = q->R_tab->rate;
	if (q->P_tab)
		opt.peakrate = q->P_tab->rate;
	else
		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	opt.mtu = q->mtu;
	opt.buffer = q->buffer;
	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);

	nla_nest_end(skb, nest);
	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
			  struct sk_buff *skb, struct tcmsg *tcm)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (cl != 1) 	/* only one class */
		return -ENOENT;

	tcm->tcm_handle |= TC_H_MIN(1);
	tcm->tcm_info = q->qdisc->handle;

	return 0;
}

static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
		     struct Qdisc **old)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (new == NULL)
		new = &noop_qdisc;

	sch_tree_lock(sch);
	*old = xchg(&q->qdisc, new);
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
	qdisc_reset(*old);
	sch_tree_unlock(sch);

	return 0;
}

static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	return q->qdisc;
}

static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
{
	return 1;
}

static void tbf_put(struct Qdisc *sch, unsigned long arg)
{
}

static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
			    struct nlattr **tca, unsigned long *arg)
{
	return -ENOSYS;
}

static int tbf_delete(struct Qdisc *sch, unsigned long arg)
{
	return -ENOSYS;
}

static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
	if (!walker->stop) {
		if (walker->count >= walker->skip)
			if (walker->fn(sch, 1, walker) < 0) {
				walker->stop = 1;
				return;
			}
		walker->count++;
	}
}

static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
{
	return NULL;
}

static const struct Qdisc_class_ops tbf_class_ops =
{
	.graft		=	tbf_graft,
	.leaf		=	tbf_leaf,
	.get		=	tbf_get,
	.put		=	tbf_put,
	.change		=	tbf_change_class,
	.delete		=	tbf_delete,
	.walk		=	tbf_walk,
	.tcf_chain	=	tbf_find_tcf,
	.dump		=	tbf_dump_class,
};

static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
	.next		=	NULL,
	.cl_ops		=	&tbf_class_ops,
	.id		=	"tbf",
	.priv_size	=	sizeof(struct tbf_sched_data),
	.enqueue	=	tbf_enqueue,
	.dequeue	=	tbf_dequeue,
	.requeue	=	tbf_requeue,
	.drop		=	tbf_drop,
	.init		=	tbf_init,
	.reset		=	tbf_reset,
	.destroy	=	tbf_destroy,
	.change		=	tbf_change,
	.dump		=	tbf_dump,
	.owner		=	THIS_MODULE,
};

static int __init tbf_module_init(void)
{
	return register_qdisc(&tbf_qdisc_ops);
}

static void __exit tbf_module_exit(void)
{
	unregister_qdisc(&tbf_qdisc_ops);
}
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
Commit	Line	Data
1da177e4 LT	1	/*
	2	* net/sched/sch_tbf.c Token Bucket Filter queue.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*
	9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
	10	* Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
	11	* original idea by Martin Devera
	12	*
	13	*/
	14
1da177e4	15	#include <linux/module.h>
1da177e4 LT	16	#include <linux/types.h>
1da177e4 LT	17	#include <linux/kernel.h>
1da177e4	18	#include <linux/string.h>
1da177e4	19	#include <linux/errno.h>
1da177e4	20	#include <linux/skbuff.h>
0ba48053	21	#include <net/netlink.h>
1da177e4 LT	22	#include <net/pkt_sched.h>
	23
	24
	25	/* Simple Token Bucket Filter.
	26	=======================================
	27
	28	SOURCE.
	29	-------
	30
	31	None.
	32
	33	Description.
	34	------------
	35
	36	A data flow obeys TBF with rate R and depth B, if for any
	37	time interval t_i...t_f the number of transmitted bits
	38	does not exceed B + R*(t_f-t_i).
	39
	40	Packetized version of this definition:
	41	The sequence of packets of sizes s_i served at moments t_i
	42	obeys TBF, if for any i<=k:
	43
	44	s_i+....+s_k <= B + R*(t_k - t_i)
	45
	46	Algorithm.
	47	----------
	48
	49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
	50
	51	N(t+delta) = min{B/R, N(t) + delta}
	52
	53	If the first packet in queue has length S, it may be
	54	transmitted only at the time t_* when S/R <= N(t_*),
	55	and in this case N(t) jumps:
	56
	57	N(t_* + 0) = N(t_* - 0) - S/R.
	58
	59
	60
	61	Actually, QoS requires two TBF to be applied to a data stream.
	62	One of them controls steady state burst size, another
	63	one with rate P (peak rate) and depth M (equal to link MTU)
	64	limits bursts at a smaller time scale.
	65
	66	It is easy to see that P>R, and B>M. If P is infinity, this double
	67	TBF is equivalent to a single one.
	68
	69	When TBF works in reshaping mode, latency is estimated as:
	70
	71	lat = max ((L-B)/R, (L-M)/P)
	72
	73
	74	NOTES.
	75	------
	76
	77	If TBF throttles, it starts a watchdog timer, which will wake it up
	78	when it is ready to transmit.
	79	Note that the minimal timer resolution is 1/HZ.
	80	If no new packets arrive during this period,
	81	or if the device is not awaken by EOI for some previous packet,
	82	TBF can stop its activity for 1/HZ.
	83
	84
	85	This means, that with depth B, the maximal rate is
86
87	R_crit = B*HZ
88
89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
90
91	Note that the peak rate TBF is much more tough: with MTU 1500
92	P_crit = 150Kbytes/sec. So, if you need greater peak
93	rates, use alpha with HZ=1000 :-)
94
95	With classful TBF, limit is just kept for backwards compatibility.
96	It is passed to the default bfifo qdisc - if the inner qdisc is
97	changed the limit is not effective anymore.
98	*/
99
100	struct tbf_sched_data
101	{
102	/* Parameters */
103	u32 limit; /* Maximal length of backlog: bytes */
104	u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105	u32 mtu;
106	u32 max_size;
107	struct qdisc_rate_table *R_tab;
108	struct qdisc_rate_table *P_tab;
109
110	/* Variables */
111	long tokens; /* Current number of B tokens */
112	long ptokens; /* Current number of P tokens */
113	psched_time_t t_c; /* Time check-point */
1da177e4	114	struct Qdisc qdisc; / Inner qdisc, default - bfifo queue */
f7f593e3	115	struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4 LT	116	};
1da177e4 LT	117
e9bef55d JDB	118	#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
e9bef55d JDB	119	#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
1da177e4 LT	120
	121	static int tbf_enqueue(struct sk_buff skb, struct Qdisc sch)
	122	{
	123	struct tbf_sched_data *q = qdisc_priv(sch);
	124	int ret;
	125
	126	if (skb->len > q->max_size) {
	127	sch->qstats.drops++;
c3bc7cff	128	#ifdef CONFIG_NET_CLS_ACT
1da177e4 LT	129	if (sch->reshape_fail == NULL \|\| sch->reshape_fail(skb, sch))
	130	#endif
	131	kfree_skb(skb);
	132
	133	return NET_XMIT_DROP;
	134	}
	135
5f86173b JK	136	ret = qdisc_enqueue(skb, q->qdisc);
5f86173b JK	137	if (ret != 0) {
1da177e4 LT	138	sch->qstats.drops++;
	139	return ret;
	140	}
	141
	142	sch->q.qlen++;
	143	sch->bstats.bytes += skb->len;
	144	sch->bstats.packets++;
	145	return 0;
	146	}
	147
	148	static int tbf_requeue(struct sk_buff skb, struct Qdisc sch)
	149	{
	150	struct tbf_sched_data *q = qdisc_priv(sch);
	151	int ret;
	152
	153	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
	154	sch->q.qlen++;
	155	sch->qstats.requeues++;
	156	}
	157
	158	return ret;
	159	}
	160
	161	static unsigned int tbf_drop(struct Qdisc* sch)
	162	{
	163	struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26	164	unsigned int len = 0;
1da177e4	165
6d037a26	166	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4 LT	167	sch->q.qlen--;
	168	sch->qstats.drops++;
	169	}
	170	return len;
	171	}
	172
1da177e4 LT	173	static struct sk_buff tbf_dequeue(struct Qdisc sch)
	174	{
	175	struct tbf_sched_data *q = qdisc_priv(sch);
	176	struct sk_buff *skb;
	177
	178	skb = q->qdisc->dequeue(q->qdisc);
	179
	180	if (skb) {
	181	psched_time_t now;
f7f593e3	182	long toks;
1da177e4 LT	183	long ptoks = 0;
	184	unsigned int len = skb->len;
	185
3bebcda2	186	now = psched_get_time();
03cc45c0	187	toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
1da177e4 LT	188
	189	if (q->P_tab) {
	190	ptoks = toks + q->ptokens;
	191	if (ptoks > (long)q->mtu)
	192	ptoks = q->mtu;
	193	ptoks -= L2T_P(q, len);
	194	}
	195	toks += q->tokens;
	196	if (toks > (long)q->buffer)
	197	toks = q->buffer;
	198	toks -= L2T(q, len);
	199
	200	if ((toks\|ptoks) >= 0) {
	201	q->t_c = now;
	202	q->tokens = toks;
	203	q->ptokens = ptoks;
	204	sch->q.qlen--;
	205	sch->flags &= ~TCQ_F_THROTTLED;
	206	return skb;
	207	}
	208
f7f593e3 PM	209	qdisc_watchdog_schedule(&q->watchdog,
f7f593e3 PM	210	now + max_t(long, -toks, -ptoks));
1da177e4 LT	211
	212	/* Maybe we have a shorter packet in the queue,
	213	which can be sent now. It sounds cool,
	214	but, however, this is wrong in principle.
	215	We MUST NOT reorder packets under these circumstances.
	216
	217	Really, if we split the flow into independent
	218	subflows, it would be a very good solution.
	219	This is the main idea of all FQ algorithms
	220	(cf. CSZ, HPFQ, HFSC)
	221	*/
	222
	223	if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
	224	/* When requeue fails skb is dropped */
e488eafc	225	qdisc_tree_decrease_qlen(q->qdisc, 1);
1da177e4 LT	226	sch->qstats.drops++;
	227	}
	228
1da177e4 LT	229	sch->qstats.overlimits++;
	230	}
	231	return NULL;
	232	}
	233
	234	static void tbf_reset(struct Qdisc* sch)
	235	{
	236	struct tbf_sched_data *q = qdisc_priv(sch);
	237
	238	qdisc_reset(q->qdisc);
	239	sch->q.qlen = 0;
3bebcda2	240	q->t_c = psched_get_time();
1da177e4 LT	241	q->tokens = q->buffer;
1da177e4 LT	242	q->ptokens = q->mtu;
f7f593e3	243	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	244	}
1da177e4 LT	245
27a3421e PM	246	static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	247	[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
	248	[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	249	[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	250	};
	251
1e90474c	252	static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
1da177e4	253	{
cee63723	254	int err;
1da177e4	255	struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c	256	struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4 LT	257	struct tc_tbf_qopt *qopt;
	258	struct qdisc_rate_table *rtab = NULL;
	259	struct qdisc_rate_table *ptab = NULL;
	260	struct Qdisc *child = NULL;
	261	int max_size,n;
	262
27a3421e	263	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723 PM	264	if (err < 0)
	265	return err;
	266
	267	err = -EINVAL;
27a3421e	268	if (tb[TCA_TBF_PARMS] == NULL)
1da177e4 LT	269	goto done;
1da177e4 LT	270
1e90474c PM	271	qopt = nla_data(tb[TCA_TBF_PARMS]);
1e90474c PM	272	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4 LT	273	if (rtab == NULL)
	274	goto done;
	275
	276	if (qopt->peakrate.rate) {
	277	if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c	278	ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4 LT	279	if (ptab == NULL)
	280	goto done;
	281	}
	282
	283	for (n = 0; n < 256; n++)
	284	if (rtab->data[n] > qopt->buffer) break;
	285	max_size = (n << qopt->rate.cell_log)-1;
	286	if (ptab) {
	287	int size;
	288
	289	for (n = 0; n < 256; n++)
	290	if (ptab->data[n] > qopt->mtu) break;
	291	size = (n << qopt->peakrate.cell_log)-1;
	292	if (size < max_size) max_size = size;
	293	}
	294	if (max_size < 0)
	295	goto done;
	296
053cfed7	297	if (qopt->limit > 0) {
fb0305ce PM	298	child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
	299	if (IS_ERR(child)) {
	300	err = PTR_ERR(child);
1da177e4	301	goto done;
fb0305ce	302	}
1da177e4 LT	303	}
	304
	305	sch_tree_lock(sch);
5e50da01 PM	306	if (child) {
5e50da01 PM	307	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
053cfed7	308	qdisc_destroy(xchg(&q->qdisc, child));
5e50da01	309	}
1da177e4 LT	310	q->limit = qopt->limit;
	311	q->mtu = qopt->mtu;
	312	q->max_size = max_size;
	313	q->buffer = qopt->buffer;
	314	q->tokens = q->buffer;
	315	q->ptokens = q->mtu;
	316	rtab = xchg(&q->R_tab, rtab);
	317	ptab = xchg(&q->P_tab, ptab);
	318	sch_tree_unlock(sch);
	319	err = 0;
	320	done:
	321	if (rtab)
	322	qdisc_put_rtab(rtab);
	323	if (ptab)
	324	qdisc_put_rtab(ptab);
	325	return err;
	326	}
	327
1e90474c	328	static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
1da177e4 LT	329	{
	330	struct tbf_sched_data *q = qdisc_priv(sch);
	331
	332	if (opt == NULL)
	333	return -EINVAL;
	334
3bebcda2	335	q->t_c = psched_get_time();
f7f593e3	336	qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 LT	337	q->qdisc = &noop_qdisc;
	338
	339	return tbf_change(sch, opt);
	340	}
	341
	342	static void tbf_destroy(struct Qdisc *sch)
	343	{
	344	struct tbf_sched_data *q = qdisc_priv(sch);
	345
f7f593e3	346	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	347
	348	if (q->P_tab)
	349	qdisc_put_rtab(q->P_tab);
	350	if (q->R_tab)
	351	qdisc_put_rtab(q->R_tab);
	352
	353	qdisc_destroy(q->qdisc);
	354	}
	355
	356	static int tbf_dump(struct Qdisc sch, struct sk_buff skb)
	357	{
	358	struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef	359	struct nlattr *nest;
1da177e4 LT	360	struct tc_tbf_qopt opt;
1da177e4 LT	361
4b3550ef PM	362	nest = nla_nest_start(skb, TCA_OPTIONS);
	363	if (nest == NULL)
	364	goto nla_put_failure;
1da177e4 LT	365
	366	opt.limit = q->limit;
	367	opt.rate = q->R_tab->rate;
	368	if (q->P_tab)
	369	opt.peakrate = q->P_tab->rate;
	370	else
	371	memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	372	opt.mtu = q->mtu;
	373	opt.buffer = q->buffer;
1e90474c	374	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
1da177e4	375
4b3550ef	376	nla_nest_end(skb, nest);
1da177e4 LT	377	return skb->len;
1da177e4 LT	378
1e90474c	379	nla_put_failure:
4b3550ef	380	nla_nest_cancel(skb, nest);
1da177e4 LT	381	return -1;
	382	}
	383
	384	static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
	385	struct sk_buff skb, struct tcmsg tcm)
	386	{
	387	struct tbf_sched_data *q = qdisc_priv(sch);
	388
	389	if (cl != 1) /* only one class */
	390	return -ENOENT;
	391
	392	tcm->tcm_handle \|= TC_H_MIN(1);
	393	tcm->tcm_info = q->qdisc->handle;
	394
	395	return 0;
	396	}
	397
	398	static int tbf_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
	399	struct Qdisc **old)
	400	{
	401	struct tbf_sched_data *q = qdisc_priv(sch);
	402
	403	if (new == NULL)
	404	new = &noop_qdisc;
	405
	406	sch_tree_lock(sch);
	407	*old = xchg(&q->qdisc, new);
5e50da01	408	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
1da177e4	409	qdisc_reset(*old);
1da177e4 LT	410	sch_tree_unlock(sch);
	411
	412	return 0;
	413	}
	414
	415	static struct Qdisc tbf_leaf(struct Qdisc sch, unsigned long arg)
	416	{
	417	struct tbf_sched_data *q = qdisc_priv(sch);
	418	return q->qdisc;
	419	}
	420
	421	static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
	422	{
	423	return 1;
	424	}
	425
	426	static void tbf_put(struct Qdisc *sch, unsigned long arg)
	427	{
	428	}
	429
10297b99	430	static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1e90474c	431	struct nlattr *tca, unsigned long arg)
1da177e4 LT	432	{
	433	return -ENOSYS;
	434	}
	435
	436	static int tbf_delete(struct Qdisc *sch, unsigned long arg)
	437	{
	438	return -ENOSYS;
	439	}
	440
	441	static void tbf_walk(struct Qdisc sch, struct qdisc_walker walker)
	442	{
	443	if (!walker->stop) {
	444	if (walker->count >= walker->skip)
	445	if (walker->fn(sch, 1, walker) < 0) {
	446	walker->stop = 1;
	447	return;
	448	}
	449	walker->count++;
	450	}
	451	}
	452
	453	static struct tcf_proto *tbf_find_tcf(struct Qdisc sch, unsigned long cl)
	454	{
	455	return NULL;
	456	}
	457
20fea08b	458	static const struct Qdisc_class_ops tbf_class_ops =
1da177e4 LT	459	{
	460	.graft = tbf_graft,
	461	.leaf = tbf_leaf,
	462	.get = tbf_get,
	463	.put = tbf_put,
	464	.change = tbf_change_class,
	465	.delete = tbf_delete,
	466	.walk = tbf_walk,
	467	.tcf_chain = tbf_find_tcf,
	468	.dump = tbf_dump_class,
	469	};
	470
20fea08b	471	static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4 LT	472	.next = NULL,
	473	.cl_ops = &tbf_class_ops,
	474	.id = "tbf",
	475	.priv_size = sizeof(struct tbf_sched_data),
	476	.enqueue = tbf_enqueue,
	477	.dequeue = tbf_dequeue,
	478	.requeue = tbf_requeue,
	479	.drop = tbf_drop,
	480	.init = tbf_init,
	481	.reset = tbf_reset,
	482	.destroy = tbf_destroy,
	483	.change = tbf_change,
	484	.dump = tbf_dump,
	485	.owner = THIS_MODULE,
	486	};
	487
	488	static int __init tbf_module_init(void)
	489	{
	490	return register_qdisc(&tbf_qdisc_ops);
	491	}
	492
	493	static void __exit tbf_module_exit(void)
	494	{
	495	unregister_qdisc(&tbf_qdisc_ops);
	496	}
	497	module_init(tbf_module_init)
	498	module_exit(tbf_module_exit)
	499	MODULE_LICENSE("GPL");