[linux-2.6-block.git] / net / sched / sch_tbf.c

/*
 * net/sched/sch_tbf.c	Token Bucket Filter queue.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *		Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
 *						 original idea by Martin Devera
 *
 */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>


/*	Simple Token Bucket Filter.
	=======================================

	SOURCE.
	-------

	None.

	Description.
	------------

	A data flow obeys TBF with rate R and depth B, if for any
	time interval t_i...t_f the number of transmitted bits
	does not exceed B + R*(t_f-t_i).

	Packetized version of this definition:
	The sequence of packets of sizes s_i served at moments t_i
	obeys TBF, if for any i<=k:

	s_i+....+s_k <= B + R*(t_k - t_i)

	Algorithm.
	----------

	Let N(t_i) be B/R initially and N(t) grow continuously with time as:

	N(t+delta) = min{B/R, N(t) + delta}

	If the first packet in queue has length S, it may be
	transmitted only at the time t_* when S/R <= N(t_*),
	and in this case N(t) jumps:

	N(t_* + 0) = N(t_* - 0) - S/R.


	Actually, QoS requires two TBF to be applied to a data stream.
	One of them controls steady state burst size, another
	one with rate P (peak rate) and depth M (equal to link MTU)
	limits bursts at a smaller time scale.

	It is easy to see that P>R, and B>M. If P is infinity, this double
	TBF is equivalent to a single one.

	When TBF works in reshaping mode, latency is estimated as:

	lat = max ((L-B)/R, (L-M)/P)


	NOTES.
	------

	If TBF throttles, it starts a watchdog timer, which will wake it up
	when it is ready to transmit.
	Note that the minimal timer resolution is 1/HZ.
	If no new packets arrive during this period,
	or if the device is not awaken by EOI for some previous packet,
	TBF can stop its activity for 1/HZ.


	This means, that with depth B, the maximal rate is

	R_crit = B*HZ

	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.

	Note that the peak rate TBF is much more tough: with MTU 1500
	P_crit = 150Kbytes/sec. So, if you need greater peak
	rates, use alpha with HZ=1000 :-)

	With classful TBF, limit is just kept for backwards compatibility.
	It is passed to the default bfifo qdisc - if the inner qdisc is
	changed the limit is not effective anymore.
*/

struct tbf_sched_data
{
/* Parameters */
	u32		limit;		/* Maximal length of backlog: bytes */
	u32		buffer;		/* Token bucket depth/rate: MUST BE >= MTU/B */
	u32		mtu;
	u32		max_size;
	struct qdisc_rate_table	*R_tab;
	struct qdisc_rate_table	*P_tab;

/* Variables */
	long	tokens;			/* Current number of B tokens */
	long	ptokens;		/* Current number of P tokens */
	psched_time_t	t_c;		/* Time check-point */
	struct Qdisc	*qdisc;		/* Inner qdisc, default - bfifo queue */
	struct qdisc_watchdog watchdog;	/* Watchdog timer */
};

#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)

static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if (qdisc_pkt_len(skb) > q->max_size) {
		sch->qstats.drops++;
#ifdef CONFIG_NET_CLS_ACT
		if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch))
#endif
			kfree_skb(skb);

		return NET_XMIT_DROP;
	}

	ret = qdisc_enqueue(skb, q->qdisc);
	if (ret != 0) {
		if (net_xmit_drop_count(ret))
			sch->qstats.drops++;
		return ret;
	}

	sch->q.qlen++;
	sch->bstats.bytes += qdisc_pkt_len(skb);
	sch->bstats.packets++;
	return 0;
}

static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	int ret;

	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
		sch->q.qlen++;
		sch->qstats.requeues++;
	}

	return ret;
}

static unsigned int tbf_drop(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	unsigned int len = 0;

	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
		sch->q.qlen--;
		sch->qstats.drops++;
	}
	return len;
}

static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct sk_buff *skb;

	skb = q->qdisc->dequeue(q->qdisc);

	if (skb) {
		psched_time_t now;
		long toks;
		long ptoks = 0;
		unsigned int len = qdisc_pkt_len(skb);

		now = psched_get_time();
		toks = psched_tdiff_bounded(now, q->t_c, q->buffer);

		if (q->P_tab) {
			ptoks = toks + q->ptokens;
			if (ptoks > (long)q->mtu)
				ptoks = q->mtu;
			ptoks -= L2T_P(q, len);
		}
		toks += q->tokens;
		if (toks > (long)q->buffer)
			toks = q->buffer;
		toks -= L2T(q, len);

		if ((toks|ptoks) >= 0) {
			q->t_c = now;
			q->tokens = toks;
			q->ptokens = ptoks;
			sch->q.qlen--;
			sch->flags &= ~TCQ_F_THROTTLED;
			return skb;
		}

		qdisc_watchdog_schedule(&q->watchdog,
					now + max_t(long, -toks, -ptoks));

		/* Maybe we have a shorter packet in the queue,
		   which can be sent now. It sounds cool,
		   but, however, this is wrong in principle.
		   We MUST NOT reorder packets under these circumstances.

		   Really, if we split the flow into independent
		   subflows, it would be a very good solution.
		   This is the main idea of all FQ algorithms
		   (cf. CSZ, HPFQ, HFSC)
		 */

		if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
			/* When requeue fails skb is dropped */
			qdisc_tree_decrease_qlen(q->qdisc, 1);
			sch->qstats.drops++;
		}

		sch->qstats.overlimits++;
	}
	return NULL;
}

static void tbf_reset(struct Qdisc* sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_reset(q->qdisc);
	sch->q.qlen = 0;
	q->t_c = psched_get_time();
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	qdisc_watchdog_cancel(&q->watchdog);
}

static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	[TCA_TBF_PARMS]	= { .len = sizeof(struct tc_tbf_qopt) },
	[TCA_TBF_RTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	[TCA_TBF_PTAB]	= { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
};

static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
{
	int err;
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *tb[TCA_TBF_PTAB + 1];
	struct tc_tbf_qopt *qopt;
	struct qdisc_rate_table *rtab = NULL;
	struct qdisc_rate_table *ptab = NULL;
	struct Qdisc *child = NULL;
	int max_size,n;

	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
	if (err < 0)
		return err;

	err = -EINVAL;
	if (tb[TCA_TBF_PARMS] == NULL)
		goto done;

	qopt = nla_data(tb[TCA_TBF_PARMS]);
	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
	if (rtab == NULL)
		goto done;

	if (qopt->peakrate.rate) {
		if (qopt->peakrate.rate > qopt->rate.rate)
			ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
		if (ptab == NULL)
			goto done;
	}

	for (n = 0; n < 256; n++)
		if (rtab->data[n] > qopt->buffer) break;
	max_size = (n << qopt->rate.cell_log)-1;
	if (ptab) {
		int size;

		for (n = 0; n < 256; n++)
			if (ptab->data[n] > qopt->mtu) break;
		size = (n << qopt->peakrate.cell_log)-1;
		if (size < max_size) max_size = size;
	}
	if (max_size < 0)
		goto done;

	if (qopt->limit > 0) {
		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
		if (IS_ERR(child)) {
			err = PTR_ERR(child);
			goto done;
		}
	}

	sch_tree_lock(sch);
	if (child) {
		qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
		qdisc_destroy(xchg(&q->qdisc, child));
	}
	q->limit = qopt->limit;
	q->mtu = qopt->mtu;
	q->max_size = max_size;
	q->buffer = qopt->buffer;
	q->tokens = q->buffer;
	q->ptokens = q->mtu;
	rtab = xchg(&q->R_tab, rtab);
	ptab = xchg(&q->P_tab, ptab);
	sch_tree_unlock(sch);
	err = 0;
done:
	if (rtab)
		qdisc_put_rtab(rtab);
	if (ptab)
		qdisc_put_rtab(ptab);
	return err;
}

static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (opt == NULL)
		return -EINVAL;

	q->t_c = psched_get_time();
	qdisc_watchdog_init(&q->watchdog, sch);
	q->qdisc = &noop_qdisc;

	return tbf_change(sch, opt);
}

static void tbf_destroy(struct Qdisc *sch)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	qdisc_watchdog_cancel(&q->watchdog);

	if (q->P_tab)
		qdisc_put_rtab(q->P_tab);
	if (q->R_tab)
		qdisc_put_rtab(q->R_tab);

	qdisc_destroy(q->qdisc);
}

static int tbf_dump(struct Qdisc *sch, struct sk_buff *skb)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	struct nlattr *nest;
	struct tc_tbf_qopt opt;

	nest = nla_nest_start(skb, TCA_OPTIONS);
	if (nest == NULL)
		goto nla_put_failure;

	opt.limit = q->limit;
	opt.rate = q->R_tab->rate;
	if (q->P_tab)
		opt.peakrate = q->P_tab->rate;
	else
		memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	opt.mtu = q->mtu;
	opt.buffer = q->buffer;
	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);

	nla_nest_end(skb, nest);
	return skb->len;

nla_put_failure:
	nla_nest_cancel(skb, nest);
	return -1;
}

static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
			  struct sk_buff *skb, struct tcmsg *tcm)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (cl != 1) 	/* only one class */
		return -ENOENT;

	tcm->tcm_handle |= TC_H_MIN(1);
	tcm->tcm_info = q->qdisc->handle;

	return 0;
}

static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
		     struct Qdisc **old)
{
	struct tbf_sched_data *q = qdisc_priv(sch);

	if (new == NULL)
		new = &noop_qdisc;

	sch_tree_lock(sch);
	*old = xchg(&q->qdisc, new);
	qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
	qdisc_reset(*old);
	sch_tree_unlock(sch);

	return 0;
}

static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg)
{
	struct tbf_sched_data *q = qdisc_priv(sch);
	return q->qdisc;
}

static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
{
	return 1;
}

static void tbf_put(struct Qdisc *sch, unsigned long arg)
{
}

static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
			    struct nlattr **tca, unsigned long *arg)
{
	return -ENOSYS;
}

static int tbf_delete(struct Qdisc *sch, unsigned long arg)
{
	return -ENOSYS;
}

static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
{
	if (!walker->stop) {
		if (walker->count >= walker->skip)
			if (walker->fn(sch, 1, walker) < 0) {
				walker->stop = 1;
				return;
			}
		walker->count++;
	}
}

static struct tcf_proto **tbf_find_tcf(struct Qdisc *sch, unsigned long cl)
{
	return NULL;
}

static const struct Qdisc_class_ops tbf_class_ops =
{
	.graft		=	tbf_graft,
	.leaf		=	tbf_leaf,
	.get		=	tbf_get,
	.put		=	tbf_put,
	.change		=	tbf_change_class,
	.delete		=	tbf_delete,
	.walk		=	tbf_walk,
	.tcf_chain	=	tbf_find_tcf,
	.dump		=	tbf_dump_class,
};

static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
	.next		=	NULL,
	.cl_ops		=	&tbf_class_ops,
	.id		=	"tbf",
	.priv_size	=	sizeof(struct tbf_sched_data),
	.enqueue	=	tbf_enqueue,
	.dequeue	=	tbf_dequeue,
	.requeue	=	tbf_requeue,
	.drop		=	tbf_drop,
	.init		=	tbf_init,
	.reset		=	tbf_reset,
	.destroy	=	tbf_destroy,
	.change		=	tbf_change,
	.dump		=	tbf_dump,
	.owner		=	THIS_MODULE,
};

static int __init tbf_module_init(void)
{
	return register_qdisc(&tbf_qdisc_ops);
}

static void __exit tbf_module_exit(void)
{
	unregister_qdisc(&tbf_qdisc_ops);
}
module_init(tbf_module_init)
module_exit(tbf_module_exit)
MODULE_LICENSE("GPL");
Commit	Line	Data
1da177e4 LT	1	/*
	2	* net/sched/sch_tbf.c Token Bucket Filter queue.
	3	*
	4	* This program is free software; you can redistribute it and/or
	5	* modify it under the terms of the GNU General Public License
	6	* as published by the Free Software Foundation; either version
	7	* 2 of the License, or (at your option) any later version.
	8	*
	9	* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
	10	* Dmitry Torokhov <dtor@mail.ru> - allow attaching inner qdiscs -
	11	* original idea by Martin Devera
	12	*
	13	*/
	14
1da177e4	15	#include <linux/module.h>
1da177e4 LT	16	#include <linux/types.h>
1da177e4 LT	17	#include <linux/kernel.h>
1da177e4	18	#include <linux/string.h>
1da177e4	19	#include <linux/errno.h>
1da177e4	20	#include <linux/skbuff.h>
0ba48053	21	#include <net/netlink.h>
1da177e4 LT	22	#include <net/pkt_sched.h>
	23
	24
	25	/* Simple Token Bucket Filter.
	26	=======================================
	27
	28	SOURCE.
	29	-------
	30
	31	None.
	32
	33	Description.
	34	------------
	35
	36	A data flow obeys TBF with rate R and depth B, if for any
	37	time interval t_i...t_f the number of transmitted bits
	38	does not exceed B + R*(t_f-t_i).
	39
	40	Packetized version of this definition:
	41	The sequence of packets of sizes s_i served at moments t_i
	42	obeys TBF, if for any i<=k:
	43
	44	s_i+....+s_k <= B + R*(t_k - t_i)
	45
	46	Algorithm.
	47	----------
	48
	49	Let N(t_i) be B/R initially and N(t) grow continuously with time as:
	50
	51	N(t+delta) = min{B/R, N(t) + delta}
	52
	53	If the first packet in queue has length S, it may be
	54	transmitted only at the time t_* when S/R <= N(t_*),
	55	and in this case N(t) jumps:
	56
	57	N(t_* + 0) = N(t_* - 0) - S/R.
	58
	59
	60
	61	Actually, QoS requires two TBF to be applied to a data stream.
	62	One of them controls steady state burst size, another
	63	one with rate P (peak rate) and depth M (equal to link MTU)
	64	limits bursts at a smaller time scale.
	65
	66	It is easy to see that P>R, and B>M. If P is infinity, this double
	67	TBF is equivalent to a single one.
	68
	69	When TBF works in reshaping mode, latency is estimated as:
	70
	71	lat = max ((L-B)/R, (L-M)/P)
	72
	73
	74	NOTES.
	75	------
	76
	77	If TBF throttles, it starts a watchdog timer, which will wake it up
	78	when it is ready to transmit.
	79	Note that the minimal timer resolution is 1/HZ.
	80	If no new packets arrive during this period,
	81	or if the device is not awaken by EOI for some previous packet,
	82	TBF can stop its activity for 1/HZ.
	83
	84
	85	This means, that with depth B, the maximal rate is
86
87	R_crit = B*HZ
88
89	F.e. for 10Mbit ethernet and HZ=100 the minimal allowed B is ~10Kbytes.
90
91	Note that the peak rate TBF is much more tough: with MTU 1500
92	P_crit = 150Kbytes/sec. So, if you need greater peak
93	rates, use alpha with HZ=1000 :-)
94
95	With classful TBF, limit is just kept for backwards compatibility.
96	It is passed to the default bfifo qdisc - if the inner qdisc is
97	changed the limit is not effective anymore.
98	*/
99
100	struct tbf_sched_data
101	{
102	/* Parameters */
103	u32 limit; /* Maximal length of backlog: bytes */
104	u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
105	u32 mtu;
106	u32 max_size;
107	struct qdisc_rate_table *R_tab;
108	struct qdisc_rate_table *P_tab;
109
110	/* Variables */
111	long tokens; /* Current number of B tokens */
112	long ptokens; /* Current number of P tokens */
113	psched_time_t t_c; /* Time check-point */
1da177e4	114	struct Qdisc qdisc; / Inner qdisc, default - bfifo queue */
f7f593e3	115	struct qdisc_watchdog watchdog; /* Watchdog timer */
1da177e4 LT	116	};
1da177e4 LT	117
e9bef55d JDB	118	#define L2T(q,L) qdisc_l2t((q)->R_tab,L)
e9bef55d JDB	119	#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
1da177e4 LT	120
	121	static int tbf_enqueue(struct sk_buff skb, struct Qdisc sch)
	122	{
	123	struct tbf_sched_data *q = qdisc_priv(sch);
	124	int ret;
	125
0abf77e5	126	if (qdisc_pkt_len(skb) > q->max_size) {
1da177e4	127	sch->qstats.drops++;
c3bc7cff	128	#ifdef CONFIG_NET_CLS_ACT
1da177e4 LT	129	if (sch->reshape_fail == NULL \|\| sch->reshape_fail(skb, sch))
	130	#endif
	131	kfree_skb(skb);
	132
	133	return NET_XMIT_DROP;
	134	}
	135
5f86173b JK	136	ret = qdisc_enqueue(skb, q->qdisc);
5f86173b JK	137	if (ret != 0) {
378a2f09 JP	138	if (net_xmit_drop_count(ret))
378a2f09 JP	139	sch->qstats.drops++;
1da177e4 LT	140	return ret;
	141	}
	142
	143	sch->q.qlen++;
0abf77e5	144	sch->bstats.bytes += qdisc_pkt_len(skb);
1da177e4 LT	145	sch->bstats.packets++;
	146	return 0;
	147	}
	148
	149	static int tbf_requeue(struct sk_buff skb, struct Qdisc sch)
	150	{
	151	struct tbf_sched_data *q = qdisc_priv(sch);
	152	int ret;
	153
	154	if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
	155	sch->q.qlen++;
	156	sch->qstats.requeues++;
	157	}
	158
	159	return ret;
	160	}
	161
	162	static unsigned int tbf_drop(struct Qdisc* sch)
	163	{
	164	struct tbf_sched_data *q = qdisc_priv(sch);
6d037a26	165	unsigned int len = 0;
1da177e4	166
6d037a26	167	if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) {
1da177e4 LT	168	sch->q.qlen--;
	169	sch->qstats.drops++;
	170	}
	171	return len;
	172	}
	173
1da177e4 LT	174	static struct sk_buff tbf_dequeue(struct Qdisc sch)
	175	{
	176	struct tbf_sched_data *q = qdisc_priv(sch);
	177	struct sk_buff *skb;
	178
	179	skb = q->qdisc->dequeue(q->qdisc);
	180
	181	if (skb) {
	182	psched_time_t now;
f7f593e3	183	long toks;
1da177e4	184	long ptoks = 0;
0abf77e5	185	unsigned int len = qdisc_pkt_len(skb);
1da177e4	186
3bebcda2	187	now = psched_get_time();
03cc45c0	188	toks = psched_tdiff_bounded(now, q->t_c, q->buffer);
1da177e4 LT	189
	190	if (q->P_tab) {
	191	ptoks = toks + q->ptokens;
	192	if (ptoks > (long)q->mtu)
	193	ptoks = q->mtu;
	194	ptoks -= L2T_P(q, len);
	195	}
	196	toks += q->tokens;
	197	if (toks > (long)q->buffer)
	198	toks = q->buffer;
	199	toks -= L2T(q, len);
	200
	201	if ((toks\|ptoks) >= 0) {
	202	q->t_c = now;
	203	q->tokens = toks;
	204	q->ptokens = ptoks;
	205	sch->q.qlen--;
	206	sch->flags &= ~TCQ_F_THROTTLED;
	207	return skb;
	208	}
	209
f7f593e3 PM	210	qdisc_watchdog_schedule(&q->watchdog,
f7f593e3 PM	211	now + max_t(long, -toks, -ptoks));
1da177e4 LT	212
	213	/* Maybe we have a shorter packet in the queue,
	214	which can be sent now. It sounds cool,
	215	but, however, this is wrong in principle.
	216	We MUST NOT reorder packets under these circumstances.
	217
	218	Really, if we split the flow into independent
	219	subflows, it would be a very good solution.
	220	This is the main idea of all FQ algorithms
	221	(cf. CSZ, HPFQ, HFSC)
	222	*/
	223
	224	if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
	225	/* When requeue fails skb is dropped */
e488eafc	226	qdisc_tree_decrease_qlen(q->qdisc, 1);
1da177e4 LT	227	sch->qstats.drops++;
	228	}
	229
1da177e4 LT	230	sch->qstats.overlimits++;
	231	}
	232	return NULL;
	233	}
	234
	235	static void tbf_reset(struct Qdisc* sch)
	236	{
	237	struct tbf_sched_data *q = qdisc_priv(sch);
	238
	239	qdisc_reset(q->qdisc);
	240	sch->q.qlen = 0;
3bebcda2	241	q->t_c = psched_get_time();
1da177e4 LT	242	q->tokens = q->buffer;
1da177e4 LT	243	q->ptokens = q->mtu;
f7f593e3	244	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	245	}
1da177e4 LT	246
27a3421e PM	247	static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
	248	[TCA_TBF_PARMS] = { .len = sizeof(struct tc_tbf_qopt) },
	249	[TCA_TBF_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	250	[TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
	251	};
	252
1e90474c	253	static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
1da177e4	254	{
cee63723	255	int err;
1da177e4	256	struct tbf_sched_data *q = qdisc_priv(sch);
1e90474c	257	struct nlattr *tb[TCA_TBF_PTAB + 1];
1da177e4 LT	258	struct tc_tbf_qopt *qopt;
	259	struct qdisc_rate_table *rtab = NULL;
	260	struct qdisc_rate_table *ptab = NULL;
	261	struct Qdisc *child = NULL;
	262	int max_size,n;
	263
27a3421e	264	err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
cee63723 PM	265	if (err < 0)
	266	return err;
	267
	268	err = -EINVAL;
27a3421e	269	if (tb[TCA_TBF_PARMS] == NULL)
1da177e4 LT	270	goto done;
1da177e4 LT	271
1e90474c PM	272	qopt = nla_data(tb[TCA_TBF_PARMS]);
1e90474c PM	273	rtab = qdisc_get_rtab(&qopt->rate, tb[TCA_TBF_RTAB]);
1da177e4 LT	274	if (rtab == NULL)
	275	goto done;
	276
	277	if (qopt->peakrate.rate) {
	278	if (qopt->peakrate.rate > qopt->rate.rate)
1e90474c	279	ptab = qdisc_get_rtab(&qopt->peakrate, tb[TCA_TBF_PTAB]);
1da177e4 LT	280	if (ptab == NULL)
	281	goto done;
	282	}
	283
	284	for (n = 0; n < 256; n++)
	285	if (rtab->data[n] > qopt->buffer) break;
	286	max_size = (n << qopt->rate.cell_log)-1;
	287	if (ptab) {
	288	int size;
	289
	290	for (n = 0; n < 256; n++)
	291	if (ptab->data[n] > qopt->mtu) break;
	292	size = (n << qopt->peakrate.cell_log)-1;
	293	if (size < max_size) max_size = size;
	294	}
	295	if (max_size < 0)
	296	goto done;
	297
053cfed7	298	if (qopt->limit > 0) {
fb0305ce PM	299	child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
	300	if (IS_ERR(child)) {
	301	err = PTR_ERR(child);
1da177e4	302	goto done;
fb0305ce	303	}
1da177e4 LT	304	}
	305
	306	sch_tree_lock(sch);
5e50da01 PM	307	if (child) {
5e50da01 PM	308	qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
053cfed7	309	qdisc_destroy(xchg(&q->qdisc, child));
5e50da01	310	}
1da177e4 LT	311	q->limit = qopt->limit;
	312	q->mtu = qopt->mtu;
	313	q->max_size = max_size;
	314	q->buffer = qopt->buffer;
	315	q->tokens = q->buffer;
	316	q->ptokens = q->mtu;
	317	rtab = xchg(&q->R_tab, rtab);
	318	ptab = xchg(&q->P_tab, ptab);
	319	sch_tree_unlock(sch);
	320	err = 0;
	321	done:
	322	if (rtab)
	323	qdisc_put_rtab(rtab);
	324	if (ptab)
	325	qdisc_put_rtab(ptab);
	326	return err;
	327	}
	328
1e90474c	329	static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
1da177e4 LT	330	{
	331	struct tbf_sched_data *q = qdisc_priv(sch);
	332
	333	if (opt == NULL)
	334	return -EINVAL;
	335
3bebcda2	336	q->t_c = psched_get_time();
f7f593e3	337	qdisc_watchdog_init(&q->watchdog, sch);
1da177e4 LT	338	q->qdisc = &noop_qdisc;
	339
	340	return tbf_change(sch, opt);
	341	}
	342
	343	static void tbf_destroy(struct Qdisc *sch)
	344	{
	345	struct tbf_sched_data *q = qdisc_priv(sch);
	346
f7f593e3	347	qdisc_watchdog_cancel(&q->watchdog);
1da177e4 LT	348
	349	if (q->P_tab)
	350	qdisc_put_rtab(q->P_tab);
	351	if (q->R_tab)
	352	qdisc_put_rtab(q->R_tab);
	353
	354	qdisc_destroy(q->qdisc);
	355	}
	356
	357	static int tbf_dump(struct Qdisc sch, struct sk_buff skb)
	358	{
	359	struct tbf_sched_data *q = qdisc_priv(sch);
4b3550ef	360	struct nlattr *nest;
1da177e4 LT	361	struct tc_tbf_qopt opt;
1da177e4 LT	362
4b3550ef PM	363	nest = nla_nest_start(skb, TCA_OPTIONS);
	364	if (nest == NULL)
	365	goto nla_put_failure;
1da177e4 LT	366
	367	opt.limit = q->limit;
	368	opt.rate = q->R_tab->rate;
	369	if (q->P_tab)
	370	opt.peakrate = q->P_tab->rate;
	371	else
	372	memset(&opt.peakrate, 0, sizeof(opt.peakrate));
	373	opt.mtu = q->mtu;
	374	opt.buffer = q->buffer;
1e90474c	375	NLA_PUT(skb, TCA_TBF_PARMS, sizeof(opt), &opt);
1da177e4	376
4b3550ef	377	nla_nest_end(skb, nest);
1da177e4 LT	378	return skb->len;
1da177e4 LT	379
1e90474c	380	nla_put_failure:
4b3550ef	381	nla_nest_cancel(skb, nest);
1da177e4 LT	382	return -1;
	383	}
	384
	385	static int tbf_dump_class(struct Qdisc *sch, unsigned long cl,
	386	struct sk_buff skb, struct tcmsg tcm)
	387	{
	388	struct tbf_sched_data *q = qdisc_priv(sch);
	389
	390	if (cl != 1) /* only one class */
	391	return -ENOENT;
	392
	393	tcm->tcm_handle \|= TC_H_MIN(1);
	394	tcm->tcm_info = q->qdisc->handle;
	395
	396	return 0;
	397	}
	398
	399	static int tbf_graft(struct Qdisc sch, unsigned long arg, struct Qdisc new,
	400	struct Qdisc **old)
	401	{
	402	struct tbf_sched_data *q = qdisc_priv(sch);
	403
	404	if (new == NULL)
	405	new = &noop_qdisc;
	406
	407	sch_tree_lock(sch);
	408	*old = xchg(&q->qdisc, new);
5e50da01	409	qdisc_tree_decrease_qlen(old, (old)->q.qlen);
1da177e4	410	qdisc_reset(*old);
1da177e4 LT	411	sch_tree_unlock(sch);
	412
	413	return 0;
	414	}
	415
	416	static struct Qdisc tbf_leaf(struct Qdisc sch, unsigned long arg)
	417	{
	418	struct tbf_sched_data *q = qdisc_priv(sch);
	419	return q->qdisc;
	420	}
	421
	422	static unsigned long tbf_get(struct Qdisc *sch, u32 classid)
	423	{
	424	return 1;
	425	}
	426
	427	static void tbf_put(struct Qdisc *sch, unsigned long arg)
	428	{
	429	}
	430
10297b99	431	static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1e90474c	432	struct nlattr *tca, unsigned long arg)
1da177e4 LT	433	{
	434	return -ENOSYS;
	435	}
	436
	437	static int tbf_delete(struct Qdisc *sch, unsigned long arg)
	438	{
	439	return -ENOSYS;
	440	}
	441
	442	static void tbf_walk(struct Qdisc sch, struct qdisc_walker walker)
	443	{
	444	if (!walker->stop) {
	445	if (walker->count >= walker->skip)
	446	if (walker->fn(sch, 1, walker) < 0) {
	447	walker->stop = 1;
	448	return;
	449	}
	450	walker->count++;
	451	}
	452	}
	453
	454	static struct tcf_proto *tbf_find_tcf(struct Qdisc sch, unsigned long cl)
	455	{
	456	return NULL;
	457	}
	458
20fea08b	459	static const struct Qdisc_class_ops tbf_class_ops =
1da177e4 LT	460	{
	461	.graft = tbf_graft,
	462	.leaf = tbf_leaf,
	463	.get = tbf_get,
	464	.put = tbf_put,
	465	.change = tbf_change_class,
	466	.delete = tbf_delete,
	467	.walk = tbf_walk,
	468	.tcf_chain = tbf_find_tcf,
	469	.dump = tbf_dump_class,
	470	};
	471
20fea08b	472	static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
1da177e4 LT	473	.next = NULL,
	474	.cl_ops = &tbf_class_ops,
	475	.id = "tbf",
	476	.priv_size = sizeof(struct tbf_sched_data),
	477	.enqueue = tbf_enqueue,
	478	.dequeue = tbf_dequeue,
	479	.requeue = tbf_requeue,
	480	.drop = tbf_drop,
	481	.init = tbf_init,
	482	.reset = tbf_reset,
	483	.destroy = tbf_destroy,
	484	.change = tbf_change,
	485	.dump = tbf_dump,
	486	.owner = THIS_MODULE,
	487	};
	488
	489	static int __init tbf_module_init(void)
	490	{
	491	return register_qdisc(&tbf_qdisc_ops);
	492	}
	493
	494	static void __exit tbf_module_exit(void)
	495	{
	496	unregister_qdisc(&tbf_qdisc_ops);
	497	}
	498	module_init(tbf_module_init)
	499	module_exit(tbf_module_exit)
	500	MODULE_LICENSE("GPL");