[linux-2.6-block.git] / lib / proportions.c

/*
 * Floating proportions
 *
 *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 *
 * Description:
 *
 * The floating proportion is a time derivative with an exponentially decaying
 * history:
 *
 *   p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
 *
 * Where j is an element from {prop_local}, x_{j} is j's number of events,
 * and i the time period over which the differential is taken. So d/dt_{-i} is
 * the differential over the i-th last period.
 *
 * The decaying history gives smooth transitions. The time differential carries
 * the notion of speed.
 *
 * The denominator is 2^(1+i) because we want the series to be normalised, ie.
 *
 *   \Sum_{i=0} 1/2^(1+i) = 1
 *
 * Further more, if we measure time (t) in the same events as x; so that:
 *
 *   t = \Sum_{j} x_{j}
 *
 * we get that:
 *
 *   \Sum_{j} p_{j} = 1
 *
 * Writing this in an iterative fashion we get (dropping the 'd's):
 *
 *   if (++x_{j}, ++t > period)
 *     t /= 2;
 *     for_each (j)
 *       x_{j} /= 2;
 *
 * so that:
 *
 *   p_{j} = x_{j} / t;
 *
 * We optimize away the '/= 2' for the global time delta by noting that:
 *
 *   if (++t > period) t /= 2:
 *
 * Can be approximated by:
 *
 *   period/2 + (++t % period/2)
 *
 * [ Furthermore, when we choose period to be 2^n it can be written in terms of
 *   binary operations and wraparound artefacts disappear. ]
 *
 * Also note that this yields a natural counter of the elapsed periods:
 *
 *   c = t / (period/2)
 *
 * [ Its monotonic increasing property can be applied to mitigate the wrap-
 *   around issue. ]
 *
 * This allows us to do away with the loop over all prop_locals on each period
 * expiration. By remembering the period count under which it was last accessed
 * as c_{j}, we can obtain the number of 'missed' cycles from:
 *
 *   c - c_{j}
 *
 * We can then lazily catch up to the global period count every time we are
 * going to use x_{j}, by doing:
 *
 *   x_{j} /= 2^(c - c_{j}), c_{j} = c
 */

#include <linux/proportions.h>
#include <linux/rcupdate.h>

int prop_descriptor_init(struct prop_descriptor *pd, int shift)
{
	int err;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	pd->index = 0;
	pd->pg[0].shift = shift;
	mutex_init(&pd->mutex);
	err = percpu_counter_init(&pd->pg[0].events, 0);
	if (err)
		goto out;

	err = percpu_counter_init(&pd->pg[1].events, 0);
	if (err)
		percpu_counter_destroy(&pd->pg[0].events);

out:
	return err;
}

/*
 * We have two copies, and flip between them to make it seem like an atomic
 * update. The update is not really atomic wrt the events counter, but
 * it is internally consistent with the bit layout depending on shift.
 *
 * We copy the events count, move the bits around and flip the index.
 */
void prop_change_shift(struct prop_descriptor *pd, int shift)
{
	int index;
	int offset;
	u64 events;
	unsigned long flags;

	if (shift > PROP_MAX_SHIFT)
		shift = PROP_MAX_SHIFT;

	mutex_lock(&pd->mutex);

	index = pd->index ^ 1;
	offset = pd->pg[pd->index].shift - shift;
	if (!offset)
		goto out;

	pd->pg[index].shift = shift;

	local_irq_save(flags);
	events = percpu_counter_sum(&pd->pg[pd->index].events);
	if (offset < 0)
		events <<= -offset;
	else
		events >>= offset;
	percpu_counter_set(&pd->pg[index].events, events);

	/*
	 * ensure the new pg is fully written before the switch
	 */
	smp_wmb();
	pd->index = index;
	local_irq_restore(flags);

	synchronize_rcu();

out:
	mutex_unlock(&pd->mutex);
}

/*
 * wrap the access to the data in an rcu_read_lock() section;
 * this is used to track the active references.
 */
static struct prop_global *prop_get_global(struct prop_descriptor *pd)
__acquires(RCU)
{
	int index;

	rcu_read_lock();
	index = pd->index;
	/*
	 * match the wmb from vcd_flip()
	 */
	smp_rmb();
	return &pd->pg[index];
}

static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg)
__releases(RCU)
{
	rcu_read_unlock();
}

static void
prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
{
	int offset = *pl_shift - new_shift;

	if (!offset)
		return;

	if (offset < 0)
		*pl_period <<= -offset;
	else
		*pl_period >>= offset;

	*pl_shift = new_shift;
}

/*
 * PERCPU
 */

#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))

int prop_local_init_percpu(struct prop_local_percpu *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	return percpu_counter_init(&pl->events, 0);
}

void prop_local_destroy_percpu(struct prop_local_percpu *pl)
{
	percpu_counter_destroy(&pl->events);
}

/*
 * Catch up with missed period expirations.
 *
 *   until (c_{j} == c)
 *     x_{j} -= x_{j}/2;
 *     c_{j}++;
 */
static
void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);

	/*
	 * For each missed period, we half the local counter.
	 * basically:
	 *   pl->events >> (global_period - pl->period);
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (period < BITS_PER_LONG) {
		s64 val = percpu_counter_read(&pl->events);

		if (val < (nr_cpu_ids * PROP_BATCH))
			val = percpu_counter_sum(&pl->events);

		__percpu_counter_add(&pl->events, -val + (val >> period),
					PROP_BATCH);
	} else
		percpu_counter_set(&pl->events, 0);

	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);
	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * identical to __prop_inc_percpu, except that it limits this pl's fraction to
 * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
 */
void __prop_inc_percpu_max(struct prop_descriptor *pd,
			   struct prop_local_percpu *pl, long frac)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_percpu(pg, pl);

	if (unlikely(frac != PROP_FRAC_BASE)) {
		unsigned long period_2 = 1UL << (pg->shift - 1);
		unsigned long counter_mask = period_2 - 1;
		unsigned long global_count;
		long numerator, denominator;

		numerator = percpu_counter_read_positive(&pl->events);
		global_count = percpu_counter_read(&pg->events);
		denominator = period_2 + (global_count & counter_mask);

		if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
			goto out_put;
	}

	percpu_counter_add(&pl->events, 1);
	percpu_counter_add(&pg->events, 1);

out_put:
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_percpu(struct prop_descriptor *pd,
		struct prop_local_percpu *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_percpu(pg, pl);
	*numerator = percpu_counter_read_positive(&pl->events);

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}

/*
 * SINGLE
 */

int prop_local_init_single(struct prop_local_single *pl)
{
	spin_lock_init(&pl->lock);
	pl->shift = 0;
	pl->period = 0;
	pl->events = 0;
	return 0;
}

void prop_local_destroy_single(struct prop_local_single *pl)
{
}

/*
 * Catch up with missed period expirations.
 */
static
void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl)
{
	unsigned long period = 1UL << (pg->shift - 1);
	unsigned long period_mask = ~(period - 1);
	unsigned long global_period;
	unsigned long flags;

	global_period = percpu_counter_read(&pg->events);
	global_period &= period_mask;

	/*
	 * Fast path - check if the local and global period count still match
	 * outside of the lock.
	 */
	if (pl->period == global_period)
		return;

	spin_lock_irqsave(&pl->lock, flags);
	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	/*
	 * For each missed period, we half the local counter.
	 */
	period = (global_period - pl->period) >> (pg->shift - 1);
	if (likely(period < BITS_PER_LONG))
		pl->events >>= period;
	else
		pl->events = 0;
	pl->period = global_period;
	spin_unlock_irqrestore(&pl->lock, flags);
}

/*
 *   ++x_{j}, ++t
 */
void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl)
{
	struct prop_global *pg = prop_get_global(pd);

	prop_norm_single(pg, pl);
	pl->events++;
	percpu_counter_add(&pg->events, 1);
	prop_put_global(pd, pg);
}

/*
 * Obtain a fraction of this proportion
 *
 *   p_{j} = x_{j} / (period/2 + t % period/2)
 */
void prop_fraction_single(struct prop_descriptor *pd,
	       	struct prop_local_single *pl,
		long *numerator, long *denominator)
{
	struct prop_global *pg = prop_get_global(pd);
	unsigned long period_2 = 1UL << (pg->shift - 1);
	unsigned long counter_mask = period_2 - 1;
	unsigned long global_count;

	prop_norm_single(pg, pl);
	*numerator = pl->events;

	global_count = percpu_counter_read(&pg->events);
	*denominator = period_2 + (global_count & counter_mask);

	prop_put_global(pd, pg);
}
Commit	Line	Data
145ca25e PZ	1	/*
	2	* Floating proportions
	3	*
	4	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
	5	*
	6	* Description:
	7	*
	8	* The floating proportion is a time derivative with an exponentially decaying
	9	* history:
	10	*
	11	* p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i)
	12	*
	13	* Where j is an element from {prop_local}, x_{j} is j's number of events,
	14	* and i the time period over which the differential is taken. So d/dt_{-i} is
	15	* the differential over the i-th last period.
	16	*
	17	* The decaying history gives smooth transitions. The time differential carries
	18	* the notion of speed.
	19	*
	20	* The denominator is 2^(1+i) because we want the series to be normalised, ie.
	21	*
	22	* \Sum_{i=0} 1/2^(1+i) = 1
	23	*
	24	* Further more, if we measure time (t) in the same events as x; so that:
	25	*
	26	* t = \Sum_{j} x_{j}
	27	*
	28	* we get that:
	29	*
	30	* \Sum_{j} p_{j} = 1
	31	*
	32	* Writing this in an iterative fashion we get (dropping the 'd's):
	33	*
	34	* if (++x_{j}, ++t > period)
	35	* t /= 2;
	36	* for_each (j)
	37	* x_{j} /= 2;
	38	*
	39	* so that:
	40	*
	41	* p_{j} = x_{j} / t;
	42	*
	43	* We optimize away the '/= 2' for the global time delta by noting that:
	44	*
	45	* if (++t > period) t /= 2:
	46	*
	47	* Can be approximated by:
	48	*
	49	* period/2 + (++t % period/2)
	50	*
	51	* [ Furthermore, when we choose period to be 2^n it can be written in terms of
	52	* binary operations and wraparound artefacts disappear. ]
	53	*
	54	* Also note that this yields a natural counter of the elapsed periods:
	55	*
	56	* c = t / (period/2)
	57	*
	58	* [ Its monotonic increasing property can be applied to mitigate the wrap-
	59	* around issue. ]
	60	*
	61	* This allows us to do away with the loop over all prop_locals on each period
	62	* expiration. By remembering the period count under which it was last accessed
	63	* as c_{j}, we can obtain the number of 'missed' cycles from:
	64	*
65	* c - c_{j}
66	*
67	* We can then lazily catch up to the global period count every time we are
68	* going to use x_{j}, by doing:
69	*
70	* x_{j} /= 2^(c - c_{j}), c_{j} = c
71	*/
72
73	#include <linux/proportions.h>
74	#include <linux/rcupdate.h>
75
145ca25e PZ	76	int prop_descriptor_init(struct prop_descriptor *pd, int shift)
	77	{
	78	int err;
	79
	80	if (shift > PROP_MAX_SHIFT)
	81	shift = PROP_MAX_SHIFT;
	82
	83	pd->index = 0;
	84	pd->pg[0].shift = shift;
	85	mutex_init(&pd->mutex);
ea319518	86	err = percpu_counter_init(&pd->pg[0].events, 0);
145ca25e PZ	87	if (err)
	88	goto out;
	89
ea319518	90	err = percpu_counter_init(&pd->pg[1].events, 0);
145ca25e PZ	91	if (err)
	92	percpu_counter_destroy(&pd->pg[0].events);
	93
	94	out:
	95	return err;
	96	}
	97
	98	/*
	99	* We have two copies, and flip between them to make it seem like an atomic
	100	* update. The update is not really atomic wrt the events counter, but
	101	* it is internally consistent with the bit layout depending on shift.
	102	*
	103	* We copy the events count, move the bits around and flip the index.
	104	*/
	105	void prop_change_shift(struct prop_descriptor *pd, int shift)
	106	{
	107	int index;
	108	int offset;
	109	u64 events;
	110	unsigned long flags;
	111
	112	if (shift > PROP_MAX_SHIFT)
	113	shift = PROP_MAX_SHIFT;
	114
	115	mutex_lock(&pd->mutex);
	116
	117	index = pd->index ^ 1;
	118	offset = pd->pg[pd->index].shift - shift;
	119	if (!offset)
	120	goto out;
	121
	122	pd->pg[index].shift = shift;
	123
	124	local_irq_save(flags);
	125	events = percpu_counter_sum(&pd->pg[pd->index].events);
	126	if (offset < 0)
	127	events <<= -offset;
	128	else
	129	events >>= offset;
	130	percpu_counter_set(&pd->pg[index].events, events);
	131
	132	/*
	133	* ensure the new pg is fully written before the switch
	134	*/
	135	smp_wmb();
	136	pd->index = index;
	137	local_irq_restore(flags);
	138
	139	synchronize_rcu();
	140
	141	out:
	142	mutex_unlock(&pd->mutex);
	143	}
	144
	145	/*
	146	* wrap the access to the data in an rcu_read_lock() section;
	147	* this is used to track the active references.
	148	*/
	149	static struct prop_global prop_get_global(struct prop_descriptor pd)
30079677	150	__acquires(RCU)
145ca25e PZ	151	{
	152	int index;
	153
	154	rcu_read_lock();
	155	index = pd->index;
	156	/*
	157	* match the wmb from vcd_flip()
	158	*/
	159	smp_rmb();
	160	return &pd->pg[index];
	161	}
	162
	163	static void prop_put_global(struct prop_descriptor pd, struct prop_global pg)
30079677	164	__releases(RCU)
145ca25e PZ	165	{
	166	rcu_read_unlock();
	167	}
	168
	169	static void
	170	prop_adjust_shift(int pl_shift, unsigned long pl_period, int new_shift)
	171	{
	172	int offset = *pl_shift - new_shift;
	173
	174	if (!offset)
	175	return;
	176
	177	if (offset < 0)
	178	*pl_period <<= -offset;
	179	else
	180	*pl_period >>= offset;
	181
	182	*pl_shift = new_shift;
	183	}
	184
	185	/*
	186	* PERCPU
	187	*/
	188
f16b34aa PZ	189	#define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
f16b34aa PZ	190
145ca25e PZ	191	int prop_local_init_percpu(struct prop_local_percpu *pl)
	192	{
	193	spin_lock_init(&pl->lock);
	194	pl->shift = 0;
	195	pl->period = 0;
ea319518	196	return percpu_counter_init(&pl->events, 0);
145ca25e PZ	197	}
	198
	199	void prop_local_destroy_percpu(struct prop_local_percpu *pl)
	200	{
	201	percpu_counter_destroy(&pl->events);
	202	}
	203
	204	/*
	205	* Catch up with missed period expirations.
	206	*
	207	* until (c_{j} == c)
	208	* x_{j} -= x_{j}/2;
	209	* c_{j}++;
	210	*/
	211	static
	212	void prop_norm_percpu(struct prop_global pg, struct prop_local_percpu pl)
	213	{
	214	unsigned long period = 1UL << (pg->shift - 1);
	215	unsigned long period_mask = ~(period - 1);
	216	unsigned long global_period;
	217	unsigned long flags;
	218
	219	global_period = percpu_counter_read(&pg->events);
	220	global_period &= period_mask;
	221
	222	/*
	223	* Fast path - check if the local and global period count still match
	224	* outside of the lock.
	225	*/
	226	if (pl->period == global_period)
	227	return;
	228
	229	spin_lock_irqsave(&pl->lock, flags);
	230	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
f16b34aa	231
145ca25e PZ	232	/*
	233	* For each missed period, we half the local counter.
	234	* basically:
	235	* pl->events >> (global_period - pl->period);
145ca25e	236	*/
f16b34aa PZ	237	period = (global_period - pl->period) >> (pg->shift - 1);
	238	if (period < BITS_PER_LONG) {
	239	s64 val = percpu_counter_read(&pl->events);
	240
	241	if (val < (nr_cpu_ids * PROP_BATCH))
	242	val = percpu_counter_sum(&pl->events);
	243
	244	__percpu_counter_add(&pl->events, -val + (val >> period),
	245	PROP_BATCH);
	246	} else
	247	percpu_counter_set(&pl->events, 0);
	248
145ca25e PZ	249	pl->period = global_period;
	250	spin_unlock_irqrestore(&pl->lock, flags);
	251	}
	252
	253	/*
	254	* ++x_{j}, ++t
	255	*/
	256	void __prop_inc_percpu(struct prop_descriptor pd, struct prop_local_percpu pl)
	257	{
	258	struct prop_global *pg = prop_get_global(pd);
	259
	260	prop_norm_percpu(pg, pl);
f16b34aa	261	__percpu_counter_add(&pl->events, 1, PROP_BATCH);
145ca25e PZ	262	percpu_counter_add(&pg->events, 1);
	263	prop_put_global(pd, pg);
	264	}
	265
a42dde04 PZ	266	/*
	267	* identical to __prop_inc_percpu, except that it limits this pl's fraction to
	268	* @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded.
	269	*/
	270	void __prop_inc_percpu_max(struct prop_descriptor *pd,
	271	struct prop_local_percpu *pl, long frac)
	272	{
	273	struct prop_global *pg = prop_get_global(pd);
	274
	275	prop_norm_percpu(pg, pl);
	276
	277	if (unlikely(frac != PROP_FRAC_BASE)) {
	278	unsigned long period_2 = 1UL << (pg->shift - 1);
	279	unsigned long counter_mask = period_2 - 1;
	280	unsigned long global_count;
	281	long numerator, denominator;
	282
	283	numerator = percpu_counter_read_positive(&pl->events);
	284	global_count = percpu_counter_read(&pg->events);
	285	denominator = period_2 + (global_count & counter_mask);
	286
	287	if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT))
	288	goto out_put;
	289	}
	290
	291	percpu_counter_add(&pl->events, 1);
	292	percpu_counter_add(&pg->events, 1);
	293
	294	out_put:
	295	prop_put_global(pd, pg);
	296	}
	297
145ca25e PZ	298	/*
	299	* Obtain a fraction of this proportion
	300	*
	301	* p_{j} = x_{j} / (period/2 + t % period/2)
	302	*/
	303	void prop_fraction_percpu(struct prop_descriptor *pd,
	304	struct prop_local_percpu *pl,
	305	long numerator, long denominator)
	306	{
	307	struct prop_global *pg = prop_get_global(pd);
	308	unsigned long period_2 = 1UL << (pg->shift - 1);
	309	unsigned long counter_mask = period_2 - 1;
	310	unsigned long global_count;
	311
	312	prop_norm_percpu(pg, pl);
	313	*numerator = percpu_counter_read_positive(&pl->events);
	314
	315	global_count = percpu_counter_read(&pg->events);
	316	*denominator = period_2 + (global_count & counter_mask);
	317
	318	prop_put_global(pd, pg);
	319	}
	320
	321	/*
	322	* SINGLE
	323	*/
	324
	325	int prop_local_init_single(struct prop_local_single *pl)
	326	{
	327	spin_lock_init(&pl->lock);
	328	pl->shift = 0;
	329	pl->period = 0;
	330	pl->events = 0;
	331	return 0;
	332	}
	333
	334	void prop_local_destroy_single(struct prop_local_single *pl)
	335	{
	336	}
	337
	338	/*
	339	* Catch up with missed period expirations.
	340	*/
	341	static
	342	void prop_norm_single(struct prop_global pg, struct prop_local_single pl)
	343	{
	344	unsigned long period = 1UL << (pg->shift - 1);
	345	unsigned long period_mask = ~(period - 1);
	346	unsigned long global_period;
	347	unsigned long flags;
	348
	349	global_period = percpu_counter_read(&pg->events);
	350	global_period &= period_mask;
	351
	352	/*
	353	* Fast path - check if the local and global period count still match
	354	* outside of the lock.
	355	*/
	356	if (pl->period == global_period)
	357	return;
	358
	359	spin_lock_irqsave(&pl->lock, flags);
	360	prop_adjust_shift(&pl->shift, &pl->period, pg->shift);
	361	/*
362	* For each missed period, we half the local counter.
363	*/
364	period = (global_period - pl->period) >> (pg->shift - 1);
365	if (likely(period < BITS_PER_LONG))
366	pl->events >>= period;
367	else
368	pl->events = 0;
369	pl->period = global_period;
370	spin_unlock_irqrestore(&pl->lock, flags);
371	}
372
373	/*
374	* ++x_{j}, ++t
375	*/
376	void __prop_inc_single(struct prop_descriptor pd, struct prop_local_single pl)
377	{
378	struct prop_global *pg = prop_get_global(pd);
379
380	prop_norm_single(pg, pl);
381	pl->events++;
382	percpu_counter_add(&pg->events, 1);
383	prop_put_global(pd, pg);
384	}
385
386	/*
387	* Obtain a fraction of this proportion
388	*
389	* p_{j} = x_{j} / (period/2 + t % period/2)
390	*/
391	void prop_fraction_single(struct prop_descriptor *pd,
392	struct prop_local_single *pl,
393	long numerator, long denominator)
394	{
395	struct prop_global *pg = prop_get_global(pd);
396	unsigned long period_2 = 1UL << (pg->shift - 1);
397	unsigned long counter_mask = period_2 - 1;
398	unsigned long global_count;
399
400	prop_norm_single(pg, pl);
401	*numerator = pl->events;
402
403	global_count = percpu_counter_read(&pg->events);
404	*denominator = period_2 + (global_count & counter_mask);
405
406	prop_put_global(pd, pg);
407	}