Commit | Line | Data |
---|---|---|
08d43a5f TZ |
1 | /* |
2 | * tracing_map - lock-free map for tracing | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * Copyright (C) 2015 Tom Zanussi <tom.zanussi@linux.intel.com> | |
15 | * | |
16 | * tracing_map implementation inspired by lock-free map algorithms | |
17 | * originated by Dr. Cliff Click: | |
18 | * | |
19 | * http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable | |
20 | * http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf | |
21 | */ | |
22 | ||
23 | #include <linux/vmalloc.h> | |
24 | #include <linux/jhash.h> | |
25 | #include <linux/slab.h> | |
26 | #include <linux/sort.h> | |
27 | ||
28 | #include "tracing_map.h" | |
29 | #include "trace.h" | |
30 | ||
31 | /* | |
32 | * NOTE: For a detailed description of the data structures used by | |
33 | * these functions (such as tracing_map_elt) please see the overview | |
34 | * of tracing_map data structures at the beginning of tracing_map.h. | |
35 | */ | |
36 | ||
37 | /** | |
38 | * tracing_map_update_sum - Add a value to a tracing_map_elt's sum field | |
39 | * @elt: The tracing_map_elt | |
40 | * @i: The index of the given sum associated with the tracing_map_elt | |
41 | * @n: The value to add to the sum | |
42 | * | |
43 | * Add n to sum i associated with the specified tracing_map_elt | |
44 | * instance. The index i is the index returned by the call to | |
45 | * tracing_map_add_sum_field() when the tracing map was set up. | |
46 | */ | |
47 | void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
48 | { | |
49 | atomic64_add(n, &elt->fields[i].sum); | |
50 | } | |
51 | ||
52 | /** | |
53 | * tracing_map_read_sum - Return the value of a tracing_map_elt's sum field | |
54 | * @elt: The tracing_map_elt | |
55 | * @i: The index of the given sum associated with the tracing_map_elt | |
56 | * | |
57 | * Retrieve the value of the sum i associated with the specified | |
58 | * tracing_map_elt instance. The index i is the index returned by the | |
59 | * call to tracing_map_add_sum_field() when the tracing map was set | |
60 | * up. | |
61 | * | |
62 | * Return: The sum associated with field i for elt. | |
63 | */ | |
64 | u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i) | |
65 | { | |
66 | return (u64)atomic64_read(&elt->fields[i].sum); | |
67 | } | |
68 | ||
2734b629 TZ |
69 | /** |
70 | * tracing_map_set_var - Assign a tracing_map_elt's variable field | |
71 | * @elt: The tracing_map_elt | |
72 | * @i: The index of the given variable associated with the tracing_map_elt | |
73 | * @n: The value to assign | |
74 | * | |
75 | * Assign n to variable i associated with the specified tracing_map_elt | |
76 | * instance. The index i is the index returned by the call to | |
77 | * tracing_map_add_var() when the tracing map was set up. | |
78 | */ | |
79 | void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
80 | { | |
81 | atomic64_set(&elt->vars[i], n); | |
82 | elt->var_set[i] = true; | |
83 | } | |
84 | ||
85 | /** | |
86 | * tracing_map_var_set - Return whether or not a variable has been set | |
87 | * @elt: The tracing_map_elt | |
88 | * @i: The index of the given variable associated with the tracing_map_elt | |
89 | * | |
90 | * Return true if the variable has been set, false otherwise. The | |
91 | * index i is the index returned by the call to tracing_map_add_var() | |
92 | * when the tracing map was set up. | |
93 | */ | |
94 | bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) | |
95 | { | |
96 | return elt->var_set[i]; | |
97 | } | |
98 | ||
99 | /** | |
100 | * tracing_map_read_var - Return the value of a tracing_map_elt's variable field | |
101 | * @elt: The tracing_map_elt | |
102 | * @i: The index of the given variable associated with the tracing_map_elt | |
103 | * | |
104 | * Retrieve the value of the variable i associated with the specified | |
105 | * tracing_map_elt instance. The index i is the index returned by the | |
106 | * call to tracing_map_add_var() when the tracing map was set | |
107 | * up. | |
108 | * | |
109 | * Return: The variable value associated with field i for elt. | |
110 | */ | |
111 | u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) | |
112 | { | |
113 | return (u64)atomic64_read(&elt->vars[i]); | |
114 | } | |
115 | ||
116 | /** | |
117 | * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field | |
118 | * @elt: The tracing_map_elt | |
119 | * @i: The index of the given variable associated with the tracing_map_elt | |
120 | * | |
121 | * Retrieve the value of the variable i associated with the specified | |
122 | * tracing_map_elt instance, and reset the variable to the 'not set' | |
123 | * state. The index i is the index returned by the call to | |
124 | * tracing_map_add_var() when the tracing map was set up. The reset | |
125 | * essentially makes the variable a read-once variable if it's only | |
126 | * accessed using this function. | |
127 | * | |
128 | * Return: The variable value associated with field i for elt. | |
129 | */ | |
130 | u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) | |
131 | { | |
132 | elt->var_set[i] = false; | |
133 | return (u64)atomic64_read(&elt->vars[i]); | |
134 | } | |
135 | ||
08d43a5f TZ |
136 | int tracing_map_cmp_string(void *val_a, void *val_b) |
137 | { | |
138 | char *a = val_a; | |
139 | char *b = val_b; | |
140 | ||
141 | return strcmp(a, b); | |
142 | } | |
143 | ||
144 | int tracing_map_cmp_none(void *val_a, void *val_b) | |
145 | { | |
146 | return 0; | |
147 | } | |
148 | ||
149 | static int tracing_map_cmp_atomic64(void *val_a, void *val_b) | |
150 | { | |
151 | u64 a = atomic64_read((atomic64_t *)val_a); | |
152 | u64 b = atomic64_read((atomic64_t *)val_b); | |
153 | ||
154 | return (a > b) ? 1 : ((a < b) ? -1 : 0); | |
155 | } | |
156 | ||
157 | #define DEFINE_TRACING_MAP_CMP_FN(type) \ | |
158 | static int tracing_map_cmp_##type(void *val_a, void *val_b) \ | |
159 | { \ | |
160 | type a = *(type *)val_a; \ | |
161 | type b = *(type *)val_b; \ | |
162 | \ | |
163 | return (a > b) ? 1 : ((a < b) ? -1 : 0); \ | |
164 | } | |
165 | ||
166 | DEFINE_TRACING_MAP_CMP_FN(s64); | |
167 | DEFINE_TRACING_MAP_CMP_FN(u64); | |
168 | DEFINE_TRACING_MAP_CMP_FN(s32); | |
169 | DEFINE_TRACING_MAP_CMP_FN(u32); | |
170 | DEFINE_TRACING_MAP_CMP_FN(s16); | |
171 | DEFINE_TRACING_MAP_CMP_FN(u16); | |
172 | DEFINE_TRACING_MAP_CMP_FN(s8); | |
173 | DEFINE_TRACING_MAP_CMP_FN(u8); | |
174 | ||
175 | tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size, | |
176 | int field_is_signed) | |
177 | { | |
178 | tracing_map_cmp_fn_t fn = tracing_map_cmp_none; | |
179 | ||
180 | switch (field_size) { | |
181 | case 8: | |
182 | if (field_is_signed) | |
183 | fn = tracing_map_cmp_s64; | |
184 | else | |
185 | fn = tracing_map_cmp_u64; | |
186 | break; | |
187 | case 4: | |
188 | if (field_is_signed) | |
189 | fn = tracing_map_cmp_s32; | |
190 | else | |
191 | fn = tracing_map_cmp_u32; | |
192 | break; | |
193 | case 2: | |
194 | if (field_is_signed) | |
195 | fn = tracing_map_cmp_s16; | |
196 | else | |
197 | fn = tracing_map_cmp_u16; | |
198 | break; | |
199 | case 1: | |
200 | if (field_is_signed) | |
201 | fn = tracing_map_cmp_s8; | |
202 | else | |
203 | fn = tracing_map_cmp_u8; | |
204 | break; | |
205 | } | |
206 | ||
207 | return fn; | |
208 | } | |
209 | ||
210 | static int tracing_map_add_field(struct tracing_map *map, | |
211 | tracing_map_cmp_fn_t cmp_fn) | |
212 | { | |
213 | int ret = -EINVAL; | |
214 | ||
215 | if (map->n_fields < TRACING_MAP_FIELDS_MAX) { | |
216 | ret = map->n_fields; | |
217 | map->fields[map->n_fields++].cmp_fn = cmp_fn; | |
218 | } | |
219 | ||
220 | return ret; | |
221 | } | |
222 | ||
223 | /** | |
224 | * tracing_map_add_sum_field - Add a field describing a tracing_map sum | |
225 | * @map: The tracing_map | |
226 | * | |
227 | * Add a sum field to the key and return the index identifying it in | |
228 | * the map and associated tracing_map_elts. This is the index used | |
229 | * for instance to update a sum for a particular tracing_map_elt using | |
230 | * tracing_map_update_sum() or reading it via tracing_map_read_sum(). | |
231 | * | |
232 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 233 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
234 | */ |
235 | int tracing_map_add_sum_field(struct tracing_map *map) | |
236 | { | |
237 | return tracing_map_add_field(map, tracing_map_cmp_atomic64); | |
238 | } | |
239 | ||
2734b629 TZ |
240 | /** |
241 | * tracing_map_add_var - Add a field describing a tracing_map var | |
242 | * @map: The tracing_map | |
243 | * | |
244 | * Add a var to the map and return the index identifying it in the map | |
245 | * and associated tracing_map_elts. This is the index used for | |
246 | * instance to update a var for a particular tracing_map_elt using | |
247 | * tracing_map_update_var() or reading it via tracing_map_read_var(). | |
248 | * | |
249 | * Return: The index identifying the var in the map and associated | |
250 | * tracing_map_elts, or -EINVAL on error. | |
251 | */ | |
252 | int tracing_map_add_var(struct tracing_map *map) | |
253 | { | |
254 | int ret = -EINVAL; | |
255 | ||
256 | if (map->n_vars < TRACING_MAP_VARS_MAX) | |
257 | ret = map->n_vars++; | |
258 | ||
259 | return ret; | |
260 | } | |
261 | ||
08d43a5f TZ |
262 | /** |
263 | * tracing_map_add_key_field - Add a field describing a tracing_map key | |
264 | * @map: The tracing_map | |
265 | * @offset: The offset within the key | |
266 | * @cmp_fn: The comparison function that will be used to sort on the key | |
267 | * | |
268 | * Let the map know there is a key and that if it's used as a sort key | |
269 | * to use cmp_fn. | |
270 | * | |
271 | * A key can be a subset of a compound key; for that purpose, the | |
272 | * offset param is used to describe where within the the compound key | |
273 | * the key referenced by this key field resides. | |
274 | * | |
275 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 276 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
277 | */ |
278 | int tracing_map_add_key_field(struct tracing_map *map, | |
279 | unsigned int offset, | |
280 | tracing_map_cmp_fn_t cmp_fn) | |
281 | ||
282 | { | |
283 | int idx = tracing_map_add_field(map, cmp_fn); | |
284 | ||
285 | if (idx < 0) | |
286 | return idx; | |
287 | ||
288 | map->fields[idx].offset = offset; | |
289 | ||
290 | map->key_idx[map->n_keys++] = idx; | |
291 | ||
292 | return idx; | |
293 | } | |
294 | ||
295 | void tracing_map_array_clear(struct tracing_map_array *a) | |
296 | { | |
297 | unsigned int i; | |
298 | ||
299 | if (!a->pages) | |
300 | return; | |
301 | ||
302 | for (i = 0; i < a->n_pages; i++) | |
303 | memset(a->pages[i], 0, PAGE_SIZE); | |
304 | } | |
305 | ||
306 | void tracing_map_array_free(struct tracing_map_array *a) | |
307 | { | |
308 | unsigned int i; | |
309 | ||
310 | if (!a) | |
311 | return; | |
312 | ||
475bb3c6 CH |
313 | if (!a->pages) |
314 | goto free; | |
08d43a5f TZ |
315 | |
316 | for (i = 0; i < a->n_pages; i++) { | |
317 | if (!a->pages[i]) | |
318 | break; | |
319 | free_page((unsigned long)a->pages[i]); | |
320 | } | |
475bb3c6 CH |
321 | |
322 | kfree(a->pages); | |
323 | ||
324 | free: | |
325 | kfree(a); | |
08d43a5f TZ |
326 | } |
327 | ||
328 | struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, | |
329 | unsigned int entry_size) | |
330 | { | |
331 | struct tracing_map_array *a; | |
332 | unsigned int i; | |
333 | ||
334 | a = kzalloc(sizeof(*a), GFP_KERNEL); | |
335 | if (!a) | |
336 | return NULL; | |
337 | ||
338 | a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); | |
339 | a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift); | |
340 | a->n_pages = n_elts / a->entries_per_page; | |
341 | if (!a->n_pages) | |
342 | a->n_pages = 1; | |
343 | a->entry_shift = fls(a->entries_per_page) - 1; | |
344 | a->entry_mask = (1 << a->entry_shift) - 1; | |
345 | ||
346 | a->pages = kcalloc(a->n_pages, sizeof(void *), GFP_KERNEL); | |
347 | if (!a->pages) | |
348 | goto free; | |
349 | ||
350 | for (i = 0; i < a->n_pages; i++) { | |
351 | a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | |
352 | if (!a->pages[i]) | |
353 | goto free; | |
354 | } | |
355 | out: | |
356 | return a; | |
357 | free: | |
358 | tracing_map_array_free(a); | |
359 | a = NULL; | |
360 | ||
361 | goto out; | |
362 | } | |
363 | ||
364 | static void tracing_map_elt_clear(struct tracing_map_elt *elt) | |
365 | { | |
366 | unsigned i; | |
367 | ||
368 | for (i = 0; i < elt->map->n_fields; i++) | |
369 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) | |
370 | atomic64_set(&elt->fields[i].sum, 0); | |
371 | ||
2734b629 TZ |
372 | for (i = 0; i < elt->map->n_vars; i++) { |
373 | atomic64_set(&elt->vars[i], 0); | |
374 | elt->var_set[i] = false; | |
375 | } | |
376 | ||
08d43a5f TZ |
377 | if (elt->map->ops && elt->map->ops->elt_clear) |
378 | elt->map->ops->elt_clear(elt); | |
379 | } | |
380 | ||
381 | static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) | |
382 | { | |
383 | unsigned int i; | |
384 | ||
385 | tracing_map_elt_clear(elt); | |
386 | ||
387 | for (i = 0; i < elt->map->n_fields; i++) { | |
388 | elt->fields[i].cmp_fn = elt->map->fields[i].cmp_fn; | |
389 | ||
390 | if (elt->fields[i].cmp_fn != tracing_map_cmp_atomic64) | |
391 | elt->fields[i].offset = elt->map->fields[i].offset; | |
392 | } | |
393 | } | |
394 | ||
395 | static void tracing_map_elt_free(struct tracing_map_elt *elt) | |
396 | { | |
397 | if (!elt) | |
398 | return; | |
399 | ||
400 | if (elt->map->ops && elt->map->ops->elt_free) | |
401 | elt->map->ops->elt_free(elt); | |
402 | kfree(elt->fields); | |
2734b629 TZ |
403 | kfree(elt->vars); |
404 | kfree(elt->var_set); | |
08d43a5f TZ |
405 | kfree(elt->key); |
406 | kfree(elt); | |
407 | } | |
408 | ||
409 | static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) | |
410 | { | |
411 | struct tracing_map_elt *elt; | |
412 | int err = 0; | |
413 | ||
414 | elt = kzalloc(sizeof(*elt), GFP_KERNEL); | |
415 | if (!elt) | |
416 | return ERR_PTR(-ENOMEM); | |
417 | ||
418 | elt->map = map; | |
419 | ||
420 | elt->key = kzalloc(map->key_size, GFP_KERNEL); | |
421 | if (!elt->key) { | |
422 | err = -ENOMEM; | |
423 | goto free; | |
424 | } | |
425 | ||
426 | elt->fields = kcalloc(map->n_fields, sizeof(*elt->fields), GFP_KERNEL); | |
427 | if (!elt->fields) { | |
428 | err = -ENOMEM; | |
429 | goto free; | |
430 | } | |
431 | ||
2734b629 TZ |
432 | elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); |
433 | if (!elt->vars) { | |
434 | err = -ENOMEM; | |
435 | goto free; | |
436 | } | |
437 | ||
438 | elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL); | |
439 | if (!elt->var_set) { | |
440 | err = -ENOMEM; | |
441 | goto free; | |
442 | } | |
443 | ||
08d43a5f TZ |
444 | tracing_map_elt_init_fields(elt); |
445 | ||
446 | if (map->ops && map->ops->elt_alloc) { | |
447 | err = map->ops->elt_alloc(elt); | |
448 | if (err) | |
449 | goto free; | |
450 | } | |
451 | return elt; | |
452 | free: | |
453 | tracing_map_elt_free(elt); | |
454 | ||
455 | return ERR_PTR(err); | |
456 | } | |
457 | ||
458 | static struct tracing_map_elt *get_free_elt(struct tracing_map *map) | |
459 | { | |
460 | struct tracing_map_elt *elt = NULL; | |
461 | int idx; | |
462 | ||
463 | idx = atomic_inc_return(&map->next_elt); | |
464 | if (idx < map->max_elts) { | |
465 | elt = *(TRACING_MAP_ELT(map->elts, idx)); | |
466 | if (map->ops && map->ops->elt_init) | |
467 | map->ops->elt_init(elt); | |
468 | } | |
469 | ||
470 | return elt; | |
471 | } | |
472 | ||
473 | static void tracing_map_free_elts(struct tracing_map *map) | |
474 | { | |
475 | unsigned int i; | |
476 | ||
477 | if (!map->elts) | |
478 | return; | |
479 | ||
6e4cf657 | 480 | for (i = 0; i < map->max_elts; i++) { |
08d43a5f | 481 | tracing_map_elt_free(*(TRACING_MAP_ELT(map->elts, i))); |
6e4cf657 TZ |
482 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; |
483 | } | |
08d43a5f TZ |
484 | |
485 | tracing_map_array_free(map->elts); | |
6e4cf657 | 486 | map->elts = NULL; |
08d43a5f TZ |
487 | } |
488 | ||
489 | static int tracing_map_alloc_elts(struct tracing_map *map) | |
490 | { | |
491 | unsigned int i; | |
492 | ||
493 | map->elts = tracing_map_array_alloc(map->max_elts, | |
494 | sizeof(struct tracing_map_elt *)); | |
495 | if (!map->elts) | |
496 | return -ENOMEM; | |
497 | ||
498 | for (i = 0; i < map->max_elts; i++) { | |
499 | *(TRACING_MAP_ELT(map->elts, i)) = tracing_map_elt_alloc(map); | |
6e4cf657 TZ |
500 | if (IS_ERR(*(TRACING_MAP_ELT(map->elts, i)))) { |
501 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; | |
08d43a5f TZ |
502 | tracing_map_free_elts(map); |
503 | ||
504 | return -ENOMEM; | |
505 | } | |
506 | } | |
507 | ||
508 | return 0; | |
509 | } | |
510 | ||
511 | static inline bool keys_match(void *key, void *test_key, unsigned key_size) | |
512 | { | |
513 | bool match = true; | |
514 | ||
515 | if (memcmp(key, test_key, key_size)) | |
516 | match = false; | |
517 | ||
518 | return match; | |
519 | } | |
520 | ||
521 | static inline struct tracing_map_elt * | |
522 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |
523 | { | |
524 | u32 idx, key_hash, test_key; | |
cbf4100e | 525 | int dup_try = 0; |
08d43a5f | 526 | struct tracing_map_entry *entry; |
cbf4100e | 527 | struct tracing_map_elt *val; |
08d43a5f TZ |
528 | |
529 | key_hash = jhash(key, map->key_size, 0); | |
530 | if (key_hash == 0) | |
531 | key_hash = 1; | |
532 | idx = key_hash >> (32 - (map->map_bits + 1)); | |
533 | ||
534 | while (1) { | |
535 | idx &= (map->map_size - 1); | |
536 | entry = TRACING_MAP_ENTRY(map->map, idx); | |
537 | test_key = entry->key; | |
538 | ||
cbf4100e VP |
539 | if (test_key && test_key == key_hash) { |
540 | val = READ_ONCE(entry->val); | |
541 | if (val && | |
542 | keys_match(key, val->key, map->key_size)) { | |
543 | if (!lookup_only) | |
544 | atomic64_inc(&map->hits); | |
545 | return val; | |
546 | } else if (unlikely(!val)) { | |
547 | /* | |
548 | * The key is present. But, val (pointer to elt | |
549 | * struct) is still NULL. which means some other | |
550 | * thread is in the process of inserting an | |
551 | * element. | |
552 | * | |
553 | * On top of that, it's key_hash is same as the | |
554 | * one being inserted right now. So, it's | |
555 | * possible that the element has the same | |
556 | * key as well. | |
557 | */ | |
558 | ||
559 | dup_try++; | |
560 | if (dup_try > map->map_size) { | |
561 | atomic64_inc(&map->drops); | |
562 | break; | |
563 | } | |
564 | continue; | |
565 | } | |
08d43a5f TZ |
566 | } |
567 | ||
568 | if (!test_key) { | |
569 | if (lookup_only) | |
570 | break; | |
571 | ||
572 | if (!cmpxchg(&entry->key, 0, key_hash)) { | |
573 | struct tracing_map_elt *elt; | |
574 | ||
575 | elt = get_free_elt(map); | |
576 | if (!elt) { | |
577 | atomic64_inc(&map->drops); | |
578 | entry->key = 0; | |
579 | break; | |
580 | } | |
581 | ||
582 | memcpy(elt->key, key, map->key_size); | |
583 | entry->val = elt; | |
584 | atomic64_inc(&map->hits); | |
585 | ||
586 | return entry->val; | |
cbf4100e VP |
587 | } else { |
588 | /* | |
589 | * cmpxchg() failed. Loop around once | |
590 | * more to check what key was inserted. | |
591 | */ | |
592 | dup_try++; | |
593 | continue; | |
08d43a5f TZ |
594 | } |
595 | } | |
596 | ||
597 | idx++; | |
598 | } | |
599 | ||
600 | return NULL; | |
601 | } | |
602 | ||
603 | /** | |
604 | * tracing_map_insert - Insert key and/or retrieve val from a tracing_map | |
605 | * @map: The tracing_map to insert into | |
606 | * @key: The key to insert | |
607 | * | |
608 | * Inserts a key into a tracing_map and creates and returns a new | |
609 | * tracing_map_elt for it, or if the key has already been inserted by | |
610 | * a previous call, returns the tracing_map_elt already associated | |
611 | * with it. When the map was created, the number of elements to be | |
612 | * allocated for the map was specified (internally maintained as | |
613 | * 'max_elts' in struct tracing_map), and that number of | |
614 | * tracing_map_elts was created by tracing_map_init(). This is the | |
615 | * pre-allocated pool of tracing_map_elts that tracing_map_insert() | |
616 | * will allocate from when adding new keys. Once that pool is | |
617 | * exhausted, tracing_map_insert() is useless and will return NULL to | |
618 | * signal that state. There are two user-visible tracing_map | |
619 | * variables, 'hits' and 'drops', which are updated by this function. | |
620 | * Every time an element is either successfully inserted or retrieved, | |
621 | * the 'hits' value is incrememented. Every time an element insertion | |
622 | * fails, the 'drops' value is incremented. | |
623 | * | |
624 | * This is a lock-free tracing map insertion function implementing a | |
625 | * modified form of Cliff Click's basic insertion algorithm. It | |
626 | * requires the table size be a power of two. To prevent any | |
627 | * possibility of an infinite loop we always make the internal table | |
628 | * size double the size of the requested table size (max_elts * 2). | |
629 | * Likewise, we never reuse a slot or resize or delete elements - when | |
630 | * we've reached max_elts entries, we simply return NULL once we've | |
631 | * run out of entries. Readers can at any point in time traverse the | |
632 | * tracing map and safely access the key/val pairs. | |
633 | * | |
634 | * Return: the tracing_map_elt pointer val associated with the key. | |
635 | * If this was a newly inserted key, the val will be a newly allocated | |
636 | * and associated tracing_map_elt pointer val. If the key wasn't | |
637 | * found and the pool of tracing_map_elts has been exhausted, NULL is | |
638 | * returned and no further insertions will succeed. | |
639 | */ | |
640 | struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key) | |
641 | { | |
642 | return __tracing_map_insert(map, key, false); | |
643 | } | |
644 | ||
645 | /** | |
646 | * tracing_map_lookup - Retrieve val from a tracing_map | |
647 | * @map: The tracing_map to perform the lookup on | |
648 | * @key: The key to look up | |
649 | * | |
650 | * Looks up key in tracing_map and if found returns the matching | |
651 | * tracing_map_elt. This is a lock-free lookup; see | |
652 | * tracing_map_insert() for details on tracing_map and how it works. | |
653 | * Every time an element is retrieved, the 'hits' value is | |
654 | * incrememented. There is one user-visible tracing_map variable, | |
655 | * 'hits', which is updated by this function. Every time an element | |
656 | * is successfully retrieved, the 'hits' value is incrememented. The | |
657 | * 'drops' value is never updated by this function. | |
658 | * | |
659 | * Return: the tracing_map_elt pointer val associated with the key. | |
660 | * If the key wasn't found, NULL is returned. | |
661 | */ | |
662 | struct tracing_map_elt *tracing_map_lookup(struct tracing_map *map, void *key) | |
663 | { | |
664 | return __tracing_map_insert(map, key, true); | |
665 | } | |
666 | ||
667 | /** | |
668 | * tracing_map_destroy - Destroy a tracing_map | |
669 | * @map: The tracing_map to destroy | |
670 | * | |
671 | * Frees a tracing_map along with its associated array of | |
672 | * tracing_map_elts. | |
673 | * | |
674 | * Callers should make sure there are no readers or writers actively | |
675 | * reading or inserting into the map before calling this. | |
676 | */ | |
677 | void tracing_map_destroy(struct tracing_map *map) | |
678 | { | |
679 | if (!map) | |
680 | return; | |
681 | ||
682 | tracing_map_free_elts(map); | |
683 | ||
684 | tracing_map_array_free(map->map); | |
685 | kfree(map); | |
686 | } | |
687 | ||
688 | /** | |
689 | * tracing_map_clear - Clear a tracing_map | |
690 | * @map: The tracing_map to clear | |
691 | * | |
692 | * Resets the tracing map to a cleared or initial state. The | |
693 | * tracing_map_elts are all cleared, and the array of struct | |
694 | * tracing_map_entry is reset to an initialized state. | |
695 | * | |
696 | * Callers should make sure there are no writers actively inserting | |
697 | * into the map before calling this. | |
698 | */ | |
699 | void tracing_map_clear(struct tracing_map *map) | |
700 | { | |
701 | unsigned int i; | |
702 | ||
703 | atomic_set(&map->next_elt, -1); | |
704 | atomic64_set(&map->hits, 0); | |
705 | atomic64_set(&map->drops, 0); | |
706 | ||
707 | tracing_map_array_clear(map->map); | |
708 | ||
709 | for (i = 0; i < map->max_elts; i++) | |
710 | tracing_map_elt_clear(*(TRACING_MAP_ELT(map->elts, i))); | |
711 | } | |
712 | ||
713 | static void set_sort_key(struct tracing_map *map, | |
714 | struct tracing_map_sort_key *sort_key) | |
715 | { | |
716 | map->sort_key = *sort_key; | |
717 | } | |
718 | ||
719 | /** | |
720 | * tracing_map_create - Create a lock-free map and element pool | |
721 | * @map_bits: The size of the map (2 ** map_bits) | |
722 | * @key_size: The size of the key for the map in bytes | |
723 | * @ops: Optional client-defined tracing_map_ops instance | |
724 | * @private_data: Client data associated with the map | |
725 | * | |
726 | * Creates and sets up a map to contain 2 ** map_bits number of | |
727 | * elements (internally maintained as 'max_elts' in struct | |
728 | * tracing_map). Before using, map fields should be added to the map | |
729 | * with tracing_map_add_sum_field() and tracing_map_add_key_field(). | |
730 | * tracing_map_init() should then be called to allocate the array of | |
731 | * tracing_map_elts, in order to avoid allocating anything in the map | |
732 | * insertion path. The user-specified map size reflects the maximum | |
733 | * number of elements that can be contained in the table requested by | |
734 | * the user - internally we double that in order to keep the table | |
735 | * sparse and keep collisions manageable. | |
736 | * | |
737 | * A tracing_map is a special-purpose map designed to aggregate or | |
738 | * 'sum' one or more values associated with a specific object of type | |
739 | * tracing_map_elt, which is attached by the map to a given key. | |
740 | * | |
741 | * tracing_map_create() sets up the map itself, and provides | |
742 | * operations for inserting tracing_map_elts, but doesn't allocate the | |
743 | * tracing_map_elts themselves, or provide a means for describing the | |
744 | * keys or sums associated with the tracing_map_elts. All | |
745 | * tracing_map_elts for a given map have the same set of sums and | |
746 | * keys, which are defined by the client using the functions | |
747 | * tracing_map_add_key_field() and tracing_map_add_sum_field(). Once | |
748 | * the fields are defined, the pool of elements allocated for the map | |
749 | * can be created, which occurs when the client code calls | |
750 | * tracing_map_init(). | |
751 | * | |
752 | * When tracing_map_init() returns, tracing_map_elt elements can be | |
753 | * inserted into the map using tracing_map_insert(). When called, | |
754 | * tracing_map_insert() grabs a free tracing_map_elt from the pool, or | |
755 | * finds an existing match in the map and in either case returns it. | |
756 | * The client can then use tracing_map_update_sum() and | |
757 | * tracing_map_read_sum() to update or read a given sum field for the | |
758 | * tracing_map_elt. | |
759 | * | |
760 | * The client can at any point retrieve and traverse the current set | |
761 | * of inserted tracing_map_elts in a tracing_map, via | |
762 | * tracing_map_sort_entries(). Sorting can be done on any field, | |
763 | * including keys. | |
764 | * | |
765 | * See tracing_map.h for a description of tracing_map_ops. | |
766 | * | |
767 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
768 | */ | |
769 | struct tracing_map *tracing_map_create(unsigned int map_bits, | |
770 | unsigned int key_size, | |
771 | const struct tracing_map_ops *ops, | |
772 | void *private_data) | |
773 | { | |
774 | struct tracing_map *map; | |
775 | unsigned int i; | |
776 | ||
777 | if (map_bits < TRACING_MAP_BITS_MIN || | |
778 | map_bits > TRACING_MAP_BITS_MAX) | |
779 | return ERR_PTR(-EINVAL); | |
780 | ||
781 | map = kzalloc(sizeof(*map), GFP_KERNEL); | |
782 | if (!map) | |
783 | return ERR_PTR(-ENOMEM); | |
784 | ||
785 | map->map_bits = map_bits; | |
786 | map->max_elts = (1 << map_bits); | |
787 | atomic_set(&map->next_elt, -1); | |
788 | ||
789 | map->map_size = (1 << (map_bits + 1)); | |
790 | map->ops = ops; | |
791 | ||
792 | map->private_data = private_data; | |
793 | ||
794 | map->map = tracing_map_array_alloc(map->map_size, | |
795 | sizeof(struct tracing_map_entry)); | |
796 | if (!map->map) | |
797 | goto free; | |
798 | ||
799 | map->key_size = key_size; | |
800 | for (i = 0; i < TRACING_MAP_KEYS_MAX; i++) | |
801 | map->key_idx[i] = -1; | |
802 | out: | |
803 | return map; | |
804 | free: | |
805 | tracing_map_destroy(map); | |
806 | map = ERR_PTR(-ENOMEM); | |
807 | ||
808 | goto out; | |
809 | } | |
810 | ||
811 | /** | |
812 | * tracing_map_init - Allocate and clear a map's tracing_map_elts | |
813 | * @map: The tracing_map to initialize | |
814 | * | |
815 | * Allocates a clears a pool of tracing_map_elts equal to the | |
816 | * user-specified size of 2 ** map_bits (internally maintained as | |
817 | * 'max_elts' in struct tracing_map). Before using, the map fields | |
818 | * should be added to the map with tracing_map_add_sum_field() and | |
819 | * tracing_map_add_key_field(). tracing_map_init() should then be | |
820 | * called to allocate the array of tracing_map_elts, in order to avoid | |
821 | * allocating anything in the map insertion path. The user-specified | |
822 | * map size reflects the max number of elements requested by the user | |
823 | * - internally we double that in order to keep the table sparse and | |
824 | * keep collisions manageable. | |
825 | * | |
826 | * See tracing_map.h for a description of tracing_map_ops. | |
827 | * | |
828 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
829 | */ | |
830 | int tracing_map_init(struct tracing_map *map) | |
831 | { | |
832 | int err; | |
833 | ||
834 | if (map->n_fields < 2) | |
835 | return -EINVAL; /* need at least 1 key and 1 val */ | |
836 | ||
837 | err = tracing_map_alloc_elts(map); | |
838 | if (err) | |
839 | return err; | |
840 | ||
841 | tracing_map_clear(map); | |
842 | ||
843 | return err; | |
844 | } | |
845 | ||
846 | static int cmp_entries_dup(const struct tracing_map_sort_entry **a, | |
847 | const struct tracing_map_sort_entry **b) | |
848 | { | |
849 | int ret = 0; | |
850 | ||
851 | if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) | |
852 | ret = 1; | |
853 | ||
854 | return ret; | |
855 | } | |
856 | ||
857 | static int cmp_entries_sum(const struct tracing_map_sort_entry **a, | |
858 | const struct tracing_map_sort_entry **b) | |
859 | { | |
860 | const struct tracing_map_elt *elt_a, *elt_b; | |
861 | struct tracing_map_sort_key *sort_key; | |
862 | struct tracing_map_field *field; | |
863 | tracing_map_cmp_fn_t cmp_fn; | |
864 | void *val_a, *val_b; | |
865 | int ret = 0; | |
866 | ||
867 | elt_a = (*a)->elt; | |
868 | elt_b = (*b)->elt; | |
869 | ||
870 | sort_key = &elt_a->map->sort_key; | |
871 | ||
872 | field = &elt_a->fields[sort_key->field_idx]; | |
873 | cmp_fn = field->cmp_fn; | |
874 | ||
875 | val_a = &elt_a->fields[sort_key->field_idx].sum; | |
876 | val_b = &elt_b->fields[sort_key->field_idx].sum; | |
877 | ||
878 | ret = cmp_fn(val_a, val_b); | |
879 | if (sort_key->descending) | |
880 | ret = -ret; | |
881 | ||
882 | return ret; | |
883 | } | |
884 | ||
885 | static int cmp_entries_key(const struct tracing_map_sort_entry **a, | |
886 | const struct tracing_map_sort_entry **b) | |
887 | { | |
888 | const struct tracing_map_elt *elt_a, *elt_b; | |
889 | struct tracing_map_sort_key *sort_key; | |
890 | struct tracing_map_field *field; | |
891 | tracing_map_cmp_fn_t cmp_fn; | |
892 | void *val_a, *val_b; | |
893 | int ret = 0; | |
894 | ||
895 | elt_a = (*a)->elt; | |
896 | elt_b = (*b)->elt; | |
897 | ||
898 | sort_key = &elt_a->map->sort_key; | |
899 | ||
900 | field = &elt_a->fields[sort_key->field_idx]; | |
901 | ||
902 | cmp_fn = field->cmp_fn; | |
903 | ||
904 | val_a = elt_a->key + field->offset; | |
905 | val_b = elt_b->key + field->offset; | |
906 | ||
907 | ret = cmp_fn(val_a, val_b); | |
908 | if (sort_key->descending) | |
909 | ret = -ret; | |
910 | ||
911 | return ret; | |
912 | } | |
913 | ||
914 | static void destroy_sort_entry(struct tracing_map_sort_entry *entry) | |
915 | { | |
916 | if (!entry) | |
917 | return; | |
918 | ||
919 | if (entry->elt_copied) | |
920 | tracing_map_elt_free(entry->elt); | |
921 | ||
922 | kfree(entry); | |
923 | } | |
924 | ||
925 | /** | |
926 | * tracing_map_destroy_sort_entries - Destroy an array of sort entries | |
927 | * @entries: The entries to destroy | |
928 | * @n_entries: The number of entries in the array | |
929 | * | |
930 | * Destroy the elements returned by a tracing_map_sort_entries() call. | |
931 | */ | |
932 | void tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, | |
933 | unsigned int n_entries) | |
934 | { | |
935 | unsigned int i; | |
936 | ||
937 | for (i = 0; i < n_entries; i++) | |
938 | destroy_sort_entry(entries[i]); | |
939 | ||
940 | vfree(entries); | |
941 | } | |
942 | ||
943 | static struct tracing_map_sort_entry * | |
944 | create_sort_entry(void *key, struct tracing_map_elt *elt) | |
945 | { | |
946 | struct tracing_map_sort_entry *sort_entry; | |
947 | ||
948 | sort_entry = kzalloc(sizeof(*sort_entry), GFP_KERNEL); | |
949 | if (!sort_entry) | |
950 | return NULL; | |
951 | ||
952 | sort_entry->key = key; | |
953 | sort_entry->elt = elt; | |
954 | ||
955 | return sort_entry; | |
956 | } | |
957 | ||
c193707d | 958 | static void detect_dups(struct tracing_map_sort_entry **sort_entries, |
08d43a5f TZ |
959 | int n_entries, unsigned int key_size) |
960 | { | |
961 | unsigned int dups = 0, total_dups = 0; | |
c193707d | 962 | int i; |
08d43a5f TZ |
963 | void *key; |
964 | ||
965 | if (n_entries < 2) | |
c193707d | 966 | return; |
08d43a5f TZ |
967 | |
968 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
969 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); | |
970 | ||
971 | key = sort_entries[0]->key; | |
972 | for (i = 1; i < n_entries; i++) { | |
973 | if (!memcmp(sort_entries[i]->key, key, key_size)) { | |
974 | dups++; total_dups++; | |
08d43a5f TZ |
975 | continue; |
976 | } | |
977 | key = sort_entries[i]->key; | |
978 | dups = 0; | |
979 | } | |
980 | ||
c193707d VP |
981 | WARN_ONCE(total_dups > 0, |
982 | "Duplicates detected: %d\n", total_dups); | |
08d43a5f TZ |
983 | } |
984 | ||
985 | static bool is_key(struct tracing_map *map, unsigned int field_idx) | |
986 | { | |
987 | unsigned int i; | |
988 | ||
989 | for (i = 0; i < map->n_keys; i++) | |
990 | if (map->key_idx[i] == field_idx) | |
991 | return true; | |
992 | return false; | |
993 | } | |
994 | ||
995 | static void sort_secondary(struct tracing_map *map, | |
996 | const struct tracing_map_sort_entry **entries, | |
997 | unsigned int n_entries, | |
998 | struct tracing_map_sort_key *primary_key, | |
999 | struct tracing_map_sort_key *secondary_key) | |
1000 | { | |
1001 | int (*primary_fn)(const struct tracing_map_sort_entry **, | |
1002 | const struct tracing_map_sort_entry **); | |
1003 | int (*secondary_fn)(const struct tracing_map_sort_entry **, | |
1004 | const struct tracing_map_sort_entry **); | |
1005 | unsigned i, start = 0, n_sub = 1; | |
1006 | ||
1007 | if (is_key(map, primary_key->field_idx)) | |
1008 | primary_fn = cmp_entries_key; | |
1009 | else | |
1010 | primary_fn = cmp_entries_sum; | |
1011 | ||
1012 | if (is_key(map, secondary_key->field_idx)) | |
1013 | secondary_fn = cmp_entries_key; | |
1014 | else | |
1015 | secondary_fn = cmp_entries_sum; | |
1016 | ||
1017 | for (i = 0; i < n_entries - 1; i++) { | |
1018 | const struct tracing_map_sort_entry **a = &entries[i]; | |
1019 | const struct tracing_map_sort_entry **b = &entries[i + 1]; | |
1020 | ||
1021 | if (primary_fn(a, b) == 0) { | |
1022 | n_sub++; | |
1023 | if (i < n_entries - 2) | |
1024 | continue; | |
1025 | } | |
1026 | ||
1027 | if (n_sub < 2) { | |
1028 | start = i + 1; | |
1029 | n_sub = 1; | |
1030 | continue; | |
1031 | } | |
1032 | ||
1033 | set_sort_key(map, secondary_key); | |
1034 | sort(&entries[start], n_sub, | |
1035 | sizeof(struct tracing_map_sort_entry *), | |
1036 | (int (*)(const void *, const void *))secondary_fn, NULL); | |
1037 | set_sort_key(map, primary_key); | |
1038 | ||
1039 | start = i + 1; | |
1040 | n_sub = 1; | |
1041 | } | |
1042 | } | |
1043 | ||
1044 | /** | |
1045 | * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map | |
1046 | * @map: The tracing_map | |
1047 | * @sort_key: The sort key to use for sorting | |
1048 | * @sort_entries: outval: pointer to allocated and sorted array of entries | |
1049 | * | |
1050 | * tracing_map_sort_entries() sorts the current set of entries in the | |
1051 | * map and returns the list of tracing_map_sort_entries containing | |
1052 | * them to the client in the sort_entries param. The client can | |
1053 | * access the struct tracing_map_elt element of interest directly as | |
1054 | * the 'elt' field of a returned struct tracing_map_sort_entry object. | |
1055 | * | |
1056 | * The sort_key has only two fields: idx and descending. 'idx' refers | |
1057 | * to the index of the field added via tracing_map_add_sum_field() or | |
1058 | * tracing_map_add_key_field() when the tracing_map was initialized. | |
1059 | * 'descending' is a flag that if set reverses the sort order, which | |
1060 | * by default is ascending. | |
1061 | * | |
1062 | * The client should not hold on to the returned array but should use | |
1063 | * it and call tracing_map_destroy_sort_entries() when done. | |
1064 | * | |
1065 | * Return: the number of sort_entries in the struct tracing_map_sort_entry | |
1066 | * array, negative on error | |
1067 | */ | |
1068 | int tracing_map_sort_entries(struct tracing_map *map, | |
1069 | struct tracing_map_sort_key *sort_keys, | |
1070 | unsigned int n_sort_keys, | |
1071 | struct tracing_map_sort_entry ***sort_entries) | |
1072 | { | |
1073 | int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, | |
1074 | const struct tracing_map_sort_entry **); | |
1075 | struct tracing_map_sort_entry *sort_entry, **entries; | |
1076 | int i, n_entries, ret; | |
1077 | ||
1078 | entries = vmalloc(map->max_elts * sizeof(sort_entry)); | |
1079 | if (!entries) | |
1080 | return -ENOMEM; | |
1081 | ||
1082 | for (i = 0, n_entries = 0; i < map->map_size; i++) { | |
1083 | struct tracing_map_entry *entry; | |
1084 | ||
1085 | entry = TRACING_MAP_ENTRY(map->map, i); | |
1086 | ||
1087 | if (!entry->key || !entry->val) | |
1088 | continue; | |
1089 | ||
1090 | entries[n_entries] = create_sort_entry(entry->val->key, | |
1091 | entry->val); | |
1092 | if (!entries[n_entries++]) { | |
1093 | ret = -ENOMEM; | |
1094 | goto free; | |
1095 | } | |
1096 | } | |
1097 | ||
1098 | if (n_entries == 0) { | |
1099 | ret = 0; | |
1100 | goto free; | |
1101 | } | |
1102 | ||
1103 | if (n_entries == 1) { | |
1104 | *sort_entries = entries; | |
1105 | return 1; | |
1106 | } | |
1107 | ||
c193707d | 1108 | detect_dups(entries, n_entries, map->key_size); |
08d43a5f TZ |
1109 | |
1110 | if (is_key(map, sort_keys[0].field_idx)) | |
1111 | cmp_entries_fn = cmp_entries_key; | |
1112 | else | |
1113 | cmp_entries_fn = cmp_entries_sum; | |
1114 | ||
1115 | set_sort_key(map, &sort_keys[0]); | |
1116 | ||
1117 | sort(entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
1118 | (int (*)(const void *, const void *))cmp_entries_fn, NULL); | |
1119 | ||
1120 | if (n_sort_keys > 1) | |
1121 | sort_secondary(map, | |
1122 | (const struct tracing_map_sort_entry **)entries, | |
1123 | n_entries, | |
1124 | &sort_keys[0], | |
1125 | &sort_keys[1]); | |
1126 | ||
1127 | *sort_entries = entries; | |
1128 | ||
1129 | return n_entries; | |
1130 | free: | |
1131 | tracing_map_destroy_sort_entries(entries, n_entries); | |
1132 | ||
1133 | return ret; | |
1134 | } |