Commit | Line | Data |
---|---|---|
145ca25e PZ |
1 | /* |
2 | * Floating proportions | |
3 | * | |
4 | * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | |
5 | * | |
6 | * Description: | |
7 | * | |
8 | * The floating proportion is a time derivative with an exponentially decaying | |
9 | * history: | |
10 | * | |
11 | * p_{j} = \Sum_{i=0} (dx_{j}/dt_{-i}) / 2^(1+i) | |
12 | * | |
13 | * Where j is an element from {prop_local}, x_{j} is j's number of events, | |
14 | * and i the time period over which the differential is taken. So d/dt_{-i} is | |
15 | * the differential over the i-th last period. | |
16 | * | |
17 | * The decaying history gives smooth transitions. The time differential carries | |
18 | * the notion of speed. | |
19 | * | |
20 | * The denominator is 2^(1+i) because we want the series to be normalised, ie. | |
21 | * | |
22 | * \Sum_{i=0} 1/2^(1+i) = 1 | |
23 | * | |
24 | * Further more, if we measure time (t) in the same events as x; so that: | |
25 | * | |
26 | * t = \Sum_{j} x_{j} | |
27 | * | |
28 | * we get that: | |
29 | * | |
30 | * \Sum_{j} p_{j} = 1 | |
31 | * | |
32 | * Writing this in an iterative fashion we get (dropping the 'd's): | |
33 | * | |
34 | * if (++x_{j}, ++t > period) | |
35 | * t /= 2; | |
36 | * for_each (j) | |
37 | * x_{j} /= 2; | |
38 | * | |
39 | * so that: | |
40 | * | |
41 | * p_{j} = x_{j} / t; | |
42 | * | |
43 | * We optimize away the '/= 2' for the global time delta by noting that: | |
44 | * | |
45 | * if (++t > period) t /= 2: | |
46 | * | |
47 | * Can be approximated by: | |
48 | * | |
49 | * period/2 + (++t % period/2) | |
50 | * | |
51 | * [ Furthermore, when we choose period to be 2^n it can be written in terms of | |
52 | * binary operations and wraparound artefacts disappear. ] | |
53 | * | |
54 | * Also note that this yields a natural counter of the elapsed periods: | |
55 | * | |
56 | * c = t / (period/2) | |
57 | * | |
58 | * [ Its monotonic increasing property can be applied to mitigate the wrap- | |
59 | * around issue. ] | |
60 | * | |
61 | * This allows us to do away with the loop over all prop_locals on each period | |
62 | * expiration. By remembering the period count under which it was last accessed | |
63 | * as c_{j}, we can obtain the number of 'missed' cycles from: | |
64 | * | |
65 | * c - c_{j} | |
66 | * | |
67 | * We can then lazily catch up to the global period count every time we are | |
68 | * going to use x_{j}, by doing: | |
69 | * | |
70 | * x_{j} /= 2^(c - c_{j}), c_{j} = c | |
71 | */ | |
72 | ||
73 | #include <linux/proportions.h> | |
74 | #include <linux/rcupdate.h> | |
75 | ||
145ca25e PZ |
76 | int prop_descriptor_init(struct prop_descriptor *pd, int shift) |
77 | { | |
78 | int err; | |
79 | ||
80 | if (shift > PROP_MAX_SHIFT) | |
81 | shift = PROP_MAX_SHIFT; | |
82 | ||
83 | pd->index = 0; | |
84 | pd->pg[0].shift = shift; | |
85 | mutex_init(&pd->mutex); | |
ea319518 | 86 | err = percpu_counter_init(&pd->pg[0].events, 0); |
145ca25e PZ |
87 | if (err) |
88 | goto out; | |
89 | ||
ea319518 | 90 | err = percpu_counter_init(&pd->pg[1].events, 0); |
145ca25e PZ |
91 | if (err) |
92 | percpu_counter_destroy(&pd->pg[0].events); | |
93 | ||
94 | out: | |
95 | return err; | |
96 | } | |
97 | ||
98 | /* | |
99 | * We have two copies, and flip between them to make it seem like an atomic | |
100 | * update. The update is not really atomic wrt the events counter, but | |
101 | * it is internally consistent with the bit layout depending on shift. | |
102 | * | |
103 | * We copy the events count, move the bits around and flip the index. | |
104 | */ | |
105 | void prop_change_shift(struct prop_descriptor *pd, int shift) | |
106 | { | |
107 | int index; | |
108 | int offset; | |
109 | u64 events; | |
110 | unsigned long flags; | |
111 | ||
112 | if (shift > PROP_MAX_SHIFT) | |
113 | shift = PROP_MAX_SHIFT; | |
114 | ||
115 | mutex_lock(&pd->mutex); | |
116 | ||
117 | index = pd->index ^ 1; | |
118 | offset = pd->pg[pd->index].shift - shift; | |
119 | if (!offset) | |
120 | goto out; | |
121 | ||
122 | pd->pg[index].shift = shift; | |
123 | ||
124 | local_irq_save(flags); | |
125 | events = percpu_counter_sum(&pd->pg[pd->index].events); | |
126 | if (offset < 0) | |
127 | events <<= -offset; | |
128 | else | |
129 | events >>= offset; | |
130 | percpu_counter_set(&pd->pg[index].events, events); | |
131 | ||
132 | /* | |
133 | * ensure the new pg is fully written before the switch | |
134 | */ | |
135 | smp_wmb(); | |
136 | pd->index = index; | |
137 | local_irq_restore(flags); | |
138 | ||
139 | synchronize_rcu(); | |
140 | ||
141 | out: | |
142 | mutex_unlock(&pd->mutex); | |
143 | } | |
144 | ||
145 | /* | |
146 | * wrap the access to the data in an rcu_read_lock() section; | |
147 | * this is used to track the active references. | |
148 | */ | |
149 | static struct prop_global *prop_get_global(struct prop_descriptor *pd) | |
30079677 | 150 | __acquires(RCU) |
145ca25e PZ |
151 | { |
152 | int index; | |
153 | ||
154 | rcu_read_lock(); | |
155 | index = pd->index; | |
156 | /* | |
157 | * match the wmb from vcd_flip() | |
158 | */ | |
159 | smp_rmb(); | |
160 | return &pd->pg[index]; | |
161 | } | |
162 | ||
163 | static void prop_put_global(struct prop_descriptor *pd, struct prop_global *pg) | |
30079677 | 164 | __releases(RCU) |
145ca25e PZ |
165 | { |
166 | rcu_read_unlock(); | |
167 | } | |
168 | ||
169 | static void | |
170 | prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift) | |
171 | { | |
172 | int offset = *pl_shift - new_shift; | |
173 | ||
174 | if (!offset) | |
175 | return; | |
176 | ||
177 | if (offset < 0) | |
178 | *pl_period <<= -offset; | |
179 | else | |
180 | *pl_period >>= offset; | |
181 | ||
182 | *pl_shift = new_shift; | |
183 | } | |
184 | ||
185 | /* | |
186 | * PERCPU | |
187 | */ | |
188 | ||
f16b34aa PZ |
189 | #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids))) |
190 | ||
145ca25e PZ |
191 | int prop_local_init_percpu(struct prop_local_percpu *pl) |
192 | { | |
740969f9 | 193 | raw_spin_lock_init(&pl->lock); |
145ca25e PZ |
194 | pl->shift = 0; |
195 | pl->period = 0; | |
ea319518 | 196 | return percpu_counter_init(&pl->events, 0); |
145ca25e PZ |
197 | } |
198 | ||
199 | void prop_local_destroy_percpu(struct prop_local_percpu *pl) | |
200 | { | |
201 | percpu_counter_destroy(&pl->events); | |
202 | } | |
203 | ||
204 | /* | |
205 | * Catch up with missed period expirations. | |
206 | * | |
207 | * until (c_{j} == c) | |
208 | * x_{j} -= x_{j}/2; | |
209 | * c_{j}++; | |
210 | */ | |
211 | static | |
212 | void prop_norm_percpu(struct prop_global *pg, struct prop_local_percpu *pl) | |
213 | { | |
214 | unsigned long period = 1UL << (pg->shift - 1); | |
215 | unsigned long period_mask = ~(period - 1); | |
216 | unsigned long global_period; | |
217 | unsigned long flags; | |
218 | ||
219 | global_period = percpu_counter_read(&pg->events); | |
220 | global_period &= period_mask; | |
221 | ||
222 | /* | |
223 | * Fast path - check if the local and global period count still match | |
224 | * outside of the lock. | |
225 | */ | |
226 | if (pl->period == global_period) | |
227 | return; | |
228 | ||
740969f9 | 229 | raw_spin_lock_irqsave(&pl->lock, flags); |
145ca25e | 230 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); |
f16b34aa | 231 | |
145ca25e PZ |
232 | /* |
233 | * For each missed period, we half the local counter. | |
234 | * basically: | |
235 | * pl->events >> (global_period - pl->period); | |
145ca25e | 236 | */ |
f16b34aa PZ |
237 | period = (global_period - pl->period) >> (pg->shift - 1); |
238 | if (period < BITS_PER_LONG) { | |
239 | s64 val = percpu_counter_read(&pl->events); | |
240 | ||
241 | if (val < (nr_cpu_ids * PROP_BATCH)) | |
242 | val = percpu_counter_sum(&pl->events); | |
243 | ||
244 | __percpu_counter_add(&pl->events, -val + (val >> period), | |
245 | PROP_BATCH); | |
246 | } else | |
247 | percpu_counter_set(&pl->events, 0); | |
248 | ||
145ca25e | 249 | pl->period = global_period; |
740969f9 | 250 | raw_spin_unlock_irqrestore(&pl->lock, flags); |
145ca25e PZ |
251 | } |
252 | ||
253 | /* | |
254 | * ++x_{j}, ++t | |
255 | */ | |
256 | void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl) | |
257 | { | |
258 | struct prop_global *pg = prop_get_global(pd); | |
259 | ||
260 | prop_norm_percpu(pg, pl); | |
f16b34aa | 261 | __percpu_counter_add(&pl->events, 1, PROP_BATCH); |
145ca25e PZ |
262 | percpu_counter_add(&pg->events, 1); |
263 | prop_put_global(pd, pg); | |
264 | } | |
265 | ||
a42dde04 PZ |
266 | /* |
267 | * identical to __prop_inc_percpu, except that it limits this pl's fraction to | |
268 | * @frac/PROP_FRAC_BASE by ignoring events when this limit has been exceeded. | |
269 | */ | |
270 | void __prop_inc_percpu_max(struct prop_descriptor *pd, | |
271 | struct prop_local_percpu *pl, long frac) | |
272 | { | |
273 | struct prop_global *pg = prop_get_global(pd); | |
274 | ||
275 | prop_norm_percpu(pg, pl); | |
276 | ||
277 | if (unlikely(frac != PROP_FRAC_BASE)) { | |
278 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
279 | unsigned long counter_mask = period_2 - 1; | |
280 | unsigned long global_count; | |
281 | long numerator, denominator; | |
282 | ||
283 | numerator = percpu_counter_read_positive(&pl->events); | |
284 | global_count = percpu_counter_read(&pg->events); | |
285 | denominator = period_2 + (global_count & counter_mask); | |
286 | ||
287 | if (numerator > ((denominator * frac) >> PROP_FRAC_SHIFT)) | |
288 | goto out_put; | |
289 | } | |
290 | ||
291 | percpu_counter_add(&pl->events, 1); | |
292 | percpu_counter_add(&pg->events, 1); | |
293 | ||
294 | out_put: | |
295 | prop_put_global(pd, pg); | |
296 | } | |
297 | ||
145ca25e PZ |
298 | /* |
299 | * Obtain a fraction of this proportion | |
300 | * | |
301 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
302 | */ | |
303 | void prop_fraction_percpu(struct prop_descriptor *pd, | |
304 | struct prop_local_percpu *pl, | |
305 | long *numerator, long *denominator) | |
306 | { | |
307 | struct prop_global *pg = prop_get_global(pd); | |
308 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
309 | unsigned long counter_mask = period_2 - 1; | |
310 | unsigned long global_count; | |
311 | ||
312 | prop_norm_percpu(pg, pl); | |
313 | *numerator = percpu_counter_read_positive(&pl->events); | |
314 | ||
315 | global_count = percpu_counter_read(&pg->events); | |
316 | *denominator = period_2 + (global_count & counter_mask); | |
317 | ||
318 | prop_put_global(pd, pg); | |
319 | } | |
320 | ||
321 | /* | |
322 | * SINGLE | |
323 | */ | |
324 | ||
325 | int prop_local_init_single(struct prop_local_single *pl) | |
326 | { | |
740969f9 | 327 | raw_spin_lock_init(&pl->lock); |
145ca25e PZ |
328 | pl->shift = 0; |
329 | pl->period = 0; | |
330 | pl->events = 0; | |
331 | return 0; | |
332 | } | |
333 | ||
334 | void prop_local_destroy_single(struct prop_local_single *pl) | |
335 | { | |
336 | } | |
337 | ||
338 | /* | |
339 | * Catch up with missed period expirations. | |
340 | */ | |
341 | static | |
342 | void prop_norm_single(struct prop_global *pg, struct prop_local_single *pl) | |
343 | { | |
344 | unsigned long period = 1UL << (pg->shift - 1); | |
345 | unsigned long period_mask = ~(period - 1); | |
346 | unsigned long global_period; | |
347 | unsigned long flags; | |
348 | ||
349 | global_period = percpu_counter_read(&pg->events); | |
350 | global_period &= period_mask; | |
351 | ||
352 | /* | |
353 | * Fast path - check if the local and global period count still match | |
354 | * outside of the lock. | |
355 | */ | |
356 | if (pl->period == global_period) | |
357 | return; | |
358 | ||
740969f9 | 359 | raw_spin_lock_irqsave(&pl->lock, flags); |
145ca25e PZ |
360 | prop_adjust_shift(&pl->shift, &pl->period, pg->shift); |
361 | /* | |
362 | * For each missed period, we half the local counter. | |
363 | */ | |
364 | period = (global_period - pl->period) >> (pg->shift - 1); | |
365 | if (likely(period < BITS_PER_LONG)) | |
366 | pl->events >>= period; | |
367 | else | |
368 | pl->events = 0; | |
369 | pl->period = global_period; | |
740969f9 | 370 | raw_spin_unlock_irqrestore(&pl->lock, flags); |
145ca25e PZ |
371 | } |
372 | ||
373 | /* | |
374 | * ++x_{j}, ++t | |
375 | */ | |
376 | void __prop_inc_single(struct prop_descriptor *pd, struct prop_local_single *pl) | |
377 | { | |
378 | struct prop_global *pg = prop_get_global(pd); | |
379 | ||
380 | prop_norm_single(pg, pl); | |
381 | pl->events++; | |
382 | percpu_counter_add(&pg->events, 1); | |
383 | prop_put_global(pd, pg); | |
384 | } | |
385 | ||
386 | /* | |
387 | * Obtain a fraction of this proportion | |
388 | * | |
389 | * p_{j} = x_{j} / (period/2 + t % period/2) | |
390 | */ | |
391 | void prop_fraction_single(struct prop_descriptor *pd, | |
392 | struct prop_local_single *pl, | |
393 | long *numerator, long *denominator) | |
394 | { | |
395 | struct prop_global *pg = prop_get_global(pd); | |
396 | unsigned long period_2 = 1UL << (pg->shift - 1); | |
397 | unsigned long counter_mask = period_2 - 1; | |
398 | unsigned long global_count; | |
399 | ||
400 | prop_norm_single(pg, pl); | |
401 | *numerator = pl->events; | |
402 | ||
403 | global_count = percpu_counter_read(&pg->events); | |
404 | *denominator = period_2 + (global_count & counter_mask); | |
405 | ||
406 | prop_put_global(pd, pg); | |
407 | } |