Commit | Line | Data |
---|---|---|
bcea3f96 | 1 | // SPDX-License-Identifier: GPL-2.0 |
08d43a5f TZ |
2 | /* |
3 | * tracing_map - lock-free map for tracing | |
4 | * | |
08d43a5f TZ |
5 | * Copyright (C) 2015 Tom Zanussi <tom.zanussi@linux.intel.com> |
6 | * | |
7 | * tracing_map implementation inspired by lock-free map algorithms | |
8 | * originated by Dr. Cliff Click: | |
9 | * | |
10 | * http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable | |
11 | * http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf | |
12 | */ | |
13 | ||
14 | #include <linux/vmalloc.h> | |
15 | #include <linux/jhash.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/sort.h> | |
f25667e5 | 18 | #include <linux/kmemleak.h> |
08d43a5f TZ |
19 | |
20 | #include "tracing_map.h" | |
21 | #include "trace.h" | |
22 | ||
23 | /* | |
24 | * NOTE: For a detailed description of the data structures used by | |
25 | * these functions (such as tracing_map_elt) please see the overview | |
26 | * of tracing_map data structures at the beginning of tracing_map.h. | |
27 | */ | |
28 | ||
29 | /** | |
30 | * tracing_map_update_sum - Add a value to a tracing_map_elt's sum field | |
31 | * @elt: The tracing_map_elt | |
32 | * @i: The index of the given sum associated with the tracing_map_elt | |
33 | * @n: The value to add to the sum | |
34 | * | |
35 | * Add n to sum i associated with the specified tracing_map_elt | |
36 | * instance. The index i is the index returned by the call to | |
37 | * tracing_map_add_sum_field() when the tracing map was set up. | |
38 | */ | |
39 | void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
40 | { | |
41 | atomic64_add(n, &elt->fields[i].sum); | |
42 | } | |
43 | ||
44 | /** | |
45 | * tracing_map_read_sum - Return the value of a tracing_map_elt's sum field | |
46 | * @elt: The tracing_map_elt | |
47 | * @i: The index of the given sum associated with the tracing_map_elt | |
48 | * | |
49 | * Retrieve the value of the sum i associated with the specified | |
50 | * tracing_map_elt instance. The index i is the index returned by the | |
51 | * call to tracing_map_add_sum_field() when the tracing map was set | |
52 | * up. | |
53 | * | |
54 | * Return: The sum associated with field i for elt. | |
55 | */ | |
56 | u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i) | |
57 | { | |
58 | return (u64)atomic64_read(&elt->fields[i].sum); | |
59 | } | |
60 | ||
2734b629 TZ |
61 | /** |
62 | * tracing_map_set_var - Assign a tracing_map_elt's variable field | |
63 | * @elt: The tracing_map_elt | |
64 | * @i: The index of the given variable associated with the tracing_map_elt | |
65 | * @n: The value to assign | |
66 | * | |
67 | * Assign n to variable i associated with the specified tracing_map_elt | |
68 | * instance. The index i is the index returned by the call to | |
69 | * tracing_map_add_var() when the tracing map was set up. | |
70 | */ | |
71 | void tracing_map_set_var(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
72 | { | |
73 | atomic64_set(&elt->vars[i], n); | |
74 | elt->var_set[i] = true; | |
75 | } | |
76 | ||
77 | /** | |
78 | * tracing_map_var_set - Return whether or not a variable has been set | |
79 | * @elt: The tracing_map_elt | |
80 | * @i: The index of the given variable associated with the tracing_map_elt | |
81 | * | |
82 | * Return true if the variable has been set, false otherwise. The | |
83 | * index i is the index returned by the call to tracing_map_add_var() | |
84 | * when the tracing map was set up. | |
85 | */ | |
86 | bool tracing_map_var_set(struct tracing_map_elt *elt, unsigned int i) | |
87 | { | |
88 | return elt->var_set[i]; | |
89 | } | |
90 | ||
91 | /** | |
92 | * tracing_map_read_var - Return the value of a tracing_map_elt's variable field | |
93 | * @elt: The tracing_map_elt | |
94 | * @i: The index of the given variable associated with the tracing_map_elt | |
95 | * | |
96 | * Retrieve the value of the variable i associated with the specified | |
97 | * tracing_map_elt instance. The index i is the index returned by the | |
98 | * call to tracing_map_add_var() when the tracing map was set | |
99 | * up. | |
100 | * | |
101 | * Return: The variable value associated with field i for elt. | |
102 | */ | |
103 | u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i) | |
104 | { | |
105 | return (u64)atomic64_read(&elt->vars[i]); | |
106 | } | |
107 | ||
108 | /** | |
109 | * tracing_map_read_var_once - Return and reset a tracing_map_elt's variable field | |
110 | * @elt: The tracing_map_elt | |
111 | * @i: The index of the given variable associated with the tracing_map_elt | |
112 | * | |
113 | * Retrieve the value of the variable i associated with the specified | |
114 | * tracing_map_elt instance, and reset the variable to the 'not set' | |
115 | * state. The index i is the index returned by the call to | |
116 | * tracing_map_add_var() when the tracing map was set up. The reset | |
117 | * essentially makes the variable a read-once variable if it's only | |
118 | * accessed using this function. | |
119 | * | |
120 | * Return: The variable value associated with field i for elt. | |
121 | */ | |
122 | u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i) | |
123 | { | |
124 | elt->var_set[i] = false; | |
125 | return (u64)atomic64_read(&elt->vars[i]); | |
126 | } | |
127 | ||
08d43a5f TZ |
128 | int tracing_map_cmp_string(void *val_a, void *val_b) |
129 | { | |
130 | char *a = val_a; | |
131 | char *b = val_b; | |
132 | ||
133 | return strcmp(a, b); | |
134 | } | |
135 | ||
136 | int tracing_map_cmp_none(void *val_a, void *val_b) | |
137 | { | |
138 | return 0; | |
139 | } | |
140 | ||
141 | static int tracing_map_cmp_atomic64(void *val_a, void *val_b) | |
142 | { | |
143 | u64 a = atomic64_read((atomic64_t *)val_a); | |
144 | u64 b = atomic64_read((atomic64_t *)val_b); | |
145 | ||
146 | return (a > b) ? 1 : ((a < b) ? -1 : 0); | |
147 | } | |
148 | ||
149 | #define DEFINE_TRACING_MAP_CMP_FN(type) \ | |
150 | static int tracing_map_cmp_##type(void *val_a, void *val_b) \ | |
151 | { \ | |
106f41f5 SRV |
152 | type a = (type)(*(u64 *)val_a); \ |
153 | type b = (type)(*(u64 *)val_b); \ | |
08d43a5f TZ |
154 | \ |
155 | return (a > b) ? 1 : ((a < b) ? -1 : 0); \ | |
156 | } | |
157 | ||
158 | DEFINE_TRACING_MAP_CMP_FN(s64); | |
159 | DEFINE_TRACING_MAP_CMP_FN(u64); | |
160 | DEFINE_TRACING_MAP_CMP_FN(s32); | |
161 | DEFINE_TRACING_MAP_CMP_FN(u32); | |
162 | DEFINE_TRACING_MAP_CMP_FN(s16); | |
163 | DEFINE_TRACING_MAP_CMP_FN(u16); | |
164 | DEFINE_TRACING_MAP_CMP_FN(s8); | |
165 | DEFINE_TRACING_MAP_CMP_FN(u8); | |
166 | ||
167 | tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size, | |
168 | int field_is_signed) | |
169 | { | |
170 | tracing_map_cmp_fn_t fn = tracing_map_cmp_none; | |
171 | ||
172 | switch (field_size) { | |
173 | case 8: | |
174 | if (field_is_signed) | |
175 | fn = tracing_map_cmp_s64; | |
176 | else | |
177 | fn = tracing_map_cmp_u64; | |
178 | break; | |
179 | case 4: | |
180 | if (field_is_signed) | |
181 | fn = tracing_map_cmp_s32; | |
182 | else | |
183 | fn = tracing_map_cmp_u32; | |
184 | break; | |
185 | case 2: | |
186 | if (field_is_signed) | |
187 | fn = tracing_map_cmp_s16; | |
188 | else | |
189 | fn = tracing_map_cmp_u16; | |
190 | break; | |
191 | case 1: | |
192 | if (field_is_signed) | |
193 | fn = tracing_map_cmp_s8; | |
194 | else | |
195 | fn = tracing_map_cmp_u8; | |
196 | break; | |
197 | } | |
198 | ||
199 | return fn; | |
200 | } | |
201 | ||
202 | static int tracing_map_add_field(struct tracing_map *map, | |
203 | tracing_map_cmp_fn_t cmp_fn) | |
204 | { | |
205 | int ret = -EINVAL; | |
206 | ||
207 | if (map->n_fields < TRACING_MAP_FIELDS_MAX) { | |
208 | ret = map->n_fields; | |
209 | map->fields[map->n_fields++].cmp_fn = cmp_fn; | |
210 | } | |
211 | ||
212 | return ret; | |
213 | } | |
214 | ||
215 | /** | |
216 | * tracing_map_add_sum_field - Add a field describing a tracing_map sum | |
217 | * @map: The tracing_map | |
218 | * | |
219 | * Add a sum field to the key and return the index identifying it in | |
220 | * the map and associated tracing_map_elts. This is the index used | |
221 | * for instance to update a sum for a particular tracing_map_elt using | |
222 | * tracing_map_update_sum() or reading it via tracing_map_read_sum(). | |
223 | * | |
224 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 225 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
226 | */ |
227 | int tracing_map_add_sum_field(struct tracing_map *map) | |
228 | { | |
229 | return tracing_map_add_field(map, tracing_map_cmp_atomic64); | |
230 | } | |
231 | ||
2734b629 TZ |
232 | /** |
233 | * tracing_map_add_var - Add a field describing a tracing_map var | |
234 | * @map: The tracing_map | |
235 | * | |
236 | * Add a var to the map and return the index identifying it in the map | |
237 | * and associated tracing_map_elts. This is the index used for | |
238 | * instance to update a var for a particular tracing_map_elt using | |
239 | * tracing_map_update_var() or reading it via tracing_map_read_var(). | |
240 | * | |
241 | * Return: The index identifying the var in the map and associated | |
242 | * tracing_map_elts, or -EINVAL on error. | |
243 | */ | |
244 | int tracing_map_add_var(struct tracing_map *map) | |
245 | { | |
246 | int ret = -EINVAL; | |
247 | ||
248 | if (map->n_vars < TRACING_MAP_VARS_MAX) | |
249 | ret = map->n_vars++; | |
250 | ||
251 | return ret; | |
252 | } | |
253 | ||
08d43a5f TZ |
254 | /** |
255 | * tracing_map_add_key_field - Add a field describing a tracing_map key | |
256 | * @map: The tracing_map | |
257 | * @offset: The offset within the key | |
258 | * @cmp_fn: The comparison function that will be used to sort on the key | |
259 | * | |
260 | * Let the map know there is a key and that if it's used as a sort key | |
261 | * to use cmp_fn. | |
262 | * | |
263 | * A key can be a subset of a compound key; for that purpose, the | |
5c8c206e | 264 | * offset param is used to describe where within the compound key |
08d43a5f TZ |
265 | * the key referenced by this key field resides. |
266 | * | |
267 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 268 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
269 | */ |
270 | int tracing_map_add_key_field(struct tracing_map *map, | |
271 | unsigned int offset, | |
272 | tracing_map_cmp_fn_t cmp_fn) | |
273 | ||
274 | { | |
275 | int idx = tracing_map_add_field(map, cmp_fn); | |
276 | ||
277 | if (idx < 0) | |
278 | return idx; | |
279 | ||
280 | map->fields[idx].offset = offset; | |
281 | ||
282 | map->key_idx[map->n_keys++] = idx; | |
283 | ||
284 | return idx; | |
285 | } | |
286 | ||
d013496f | 287 | static void tracing_map_array_clear(struct tracing_map_array *a) |
08d43a5f TZ |
288 | { |
289 | unsigned int i; | |
290 | ||
291 | if (!a->pages) | |
292 | return; | |
293 | ||
294 | for (i = 0; i < a->n_pages; i++) | |
295 | memset(a->pages[i], 0, PAGE_SIZE); | |
296 | } | |
297 | ||
d013496f | 298 | static void tracing_map_array_free(struct tracing_map_array *a) |
08d43a5f TZ |
299 | { |
300 | unsigned int i; | |
301 | ||
302 | if (!a) | |
303 | return; | |
304 | ||
475bb3c6 CH |
305 | if (!a->pages) |
306 | goto free; | |
08d43a5f TZ |
307 | |
308 | for (i = 0; i < a->n_pages; i++) { | |
309 | if (!a->pages[i]) | |
310 | break; | |
f25667e5 | 311 | kmemleak_free(a->pages[i]); |
08d43a5f TZ |
312 | free_page((unsigned long)a->pages[i]); |
313 | } | |
475bb3c6 CH |
314 | |
315 | kfree(a->pages); | |
316 | ||
317 | free: | |
318 | kfree(a); | |
08d43a5f TZ |
319 | } |
320 | ||
d013496f | 321 | static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, |
08d43a5f TZ |
322 | unsigned int entry_size) |
323 | { | |
324 | struct tracing_map_array *a; | |
325 | unsigned int i; | |
326 | ||
327 | a = kzalloc(sizeof(*a), GFP_KERNEL); | |
328 | if (!a) | |
329 | return NULL; | |
330 | ||
331 | a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); | |
332 | a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift); | |
333 | a->n_pages = n_elts / a->entries_per_page; | |
334 | if (!a->n_pages) | |
335 | a->n_pages = 1; | |
336 | a->entry_shift = fls(a->entries_per_page) - 1; | |
337 | a->entry_mask = (1 << a->entry_shift) - 1; | |
338 | ||
339 | a->pages = kcalloc(a->n_pages, sizeof(void *), GFP_KERNEL); | |
340 | if (!a->pages) | |
341 | goto free; | |
342 | ||
343 | for (i = 0; i < a->n_pages; i++) { | |
344 | a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | |
345 | if (!a->pages[i]) | |
346 | goto free; | |
f25667e5 | 347 | kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL); |
08d43a5f TZ |
348 | } |
349 | out: | |
350 | return a; | |
351 | free: | |
352 | tracing_map_array_free(a); | |
353 | a = NULL; | |
354 | ||
355 | goto out; | |
356 | } | |
357 | ||
358 | static void tracing_map_elt_clear(struct tracing_map_elt *elt) | |
359 | { | |
360 | unsigned i; | |
361 | ||
362 | for (i = 0; i < elt->map->n_fields; i++) | |
363 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) | |
364 | atomic64_set(&elt->fields[i].sum, 0); | |
365 | ||
2734b629 TZ |
366 | for (i = 0; i < elt->map->n_vars; i++) { |
367 | atomic64_set(&elt->vars[i], 0); | |
368 | elt->var_set[i] = false; | |
369 | } | |
370 | ||
08d43a5f TZ |
371 | if (elt->map->ops && elt->map->ops->elt_clear) |
372 | elt->map->ops->elt_clear(elt); | |
373 | } | |
374 | ||
375 | static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) | |
376 | { | |
377 | unsigned int i; | |
378 | ||
379 | tracing_map_elt_clear(elt); | |
380 | ||
381 | for (i = 0; i < elt->map->n_fields; i++) { | |
382 | elt->fields[i].cmp_fn = elt->map->fields[i].cmp_fn; | |
383 | ||
384 | if (elt->fields[i].cmp_fn != tracing_map_cmp_atomic64) | |
385 | elt->fields[i].offset = elt->map->fields[i].offset; | |
386 | } | |
387 | } | |
388 | ||
389 | static void tracing_map_elt_free(struct tracing_map_elt *elt) | |
390 | { | |
391 | if (!elt) | |
392 | return; | |
393 | ||
394 | if (elt->map->ops && elt->map->ops->elt_free) | |
395 | elt->map->ops->elt_free(elt); | |
396 | kfree(elt->fields); | |
2734b629 TZ |
397 | kfree(elt->vars); |
398 | kfree(elt->var_set); | |
08d43a5f TZ |
399 | kfree(elt->key); |
400 | kfree(elt); | |
401 | } | |
402 | ||
403 | static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) | |
404 | { | |
405 | struct tracing_map_elt *elt; | |
406 | int err = 0; | |
407 | ||
408 | elt = kzalloc(sizeof(*elt), GFP_KERNEL); | |
409 | if (!elt) | |
410 | return ERR_PTR(-ENOMEM); | |
411 | ||
412 | elt->map = map; | |
413 | ||
414 | elt->key = kzalloc(map->key_size, GFP_KERNEL); | |
415 | if (!elt->key) { | |
416 | err = -ENOMEM; | |
417 | goto free; | |
418 | } | |
419 | ||
420 | elt->fields = kcalloc(map->n_fields, sizeof(*elt->fields), GFP_KERNEL); | |
421 | if (!elt->fields) { | |
422 | err = -ENOMEM; | |
423 | goto free; | |
424 | } | |
425 | ||
2734b629 TZ |
426 | elt->vars = kcalloc(map->n_vars, sizeof(*elt->vars), GFP_KERNEL); |
427 | if (!elt->vars) { | |
428 | err = -ENOMEM; | |
429 | goto free; | |
430 | } | |
431 | ||
432 | elt->var_set = kcalloc(map->n_vars, sizeof(*elt->var_set), GFP_KERNEL); | |
433 | if (!elt->var_set) { | |
434 | err = -ENOMEM; | |
435 | goto free; | |
436 | } | |
437 | ||
08d43a5f TZ |
438 | tracing_map_elt_init_fields(elt); |
439 | ||
440 | if (map->ops && map->ops->elt_alloc) { | |
441 | err = map->ops->elt_alloc(elt); | |
442 | if (err) | |
443 | goto free; | |
444 | } | |
445 | return elt; | |
446 | free: | |
447 | tracing_map_elt_free(elt); | |
448 | ||
449 | return ERR_PTR(err); | |
450 | } | |
451 | ||
452 | static struct tracing_map_elt *get_free_elt(struct tracing_map *map) | |
453 | { | |
454 | struct tracing_map_elt *elt = NULL; | |
455 | int idx; | |
456 | ||
bcf86c01 | 457 | idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts); |
08d43a5f TZ |
458 | if (idx < map->max_elts) { |
459 | elt = *(TRACING_MAP_ELT(map->elts, idx)); | |
460 | if (map->ops && map->ops->elt_init) | |
461 | map->ops->elt_init(elt); | |
462 | } | |
463 | ||
464 | return elt; | |
465 | } | |
466 | ||
467 | static void tracing_map_free_elts(struct tracing_map *map) | |
468 | { | |
469 | unsigned int i; | |
470 | ||
471 | if (!map->elts) | |
472 | return; | |
473 | ||
6e4cf657 | 474 | for (i = 0; i < map->max_elts; i++) { |
08d43a5f | 475 | tracing_map_elt_free(*(TRACING_MAP_ELT(map->elts, i))); |
6e4cf657 TZ |
476 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; |
477 | } | |
08d43a5f TZ |
478 | |
479 | tracing_map_array_free(map->elts); | |
6e4cf657 | 480 | map->elts = NULL; |
08d43a5f TZ |
481 | } |
482 | ||
483 | static int tracing_map_alloc_elts(struct tracing_map *map) | |
484 | { | |
485 | unsigned int i; | |
486 | ||
487 | map->elts = tracing_map_array_alloc(map->max_elts, | |
488 | sizeof(struct tracing_map_elt *)); | |
489 | if (!map->elts) | |
490 | return -ENOMEM; | |
491 | ||
492 | for (i = 0; i < map->max_elts; i++) { | |
493 | *(TRACING_MAP_ELT(map->elts, i)) = tracing_map_elt_alloc(map); | |
6e4cf657 TZ |
494 | if (IS_ERR(*(TRACING_MAP_ELT(map->elts, i)))) { |
495 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; | |
08d43a5f TZ |
496 | tracing_map_free_elts(map); |
497 | ||
498 | return -ENOMEM; | |
499 | } | |
500 | } | |
501 | ||
502 | return 0; | |
503 | } | |
504 | ||
505 | static inline bool keys_match(void *key, void *test_key, unsigned key_size) | |
506 | { | |
507 | bool match = true; | |
508 | ||
509 | if (memcmp(key, test_key, key_size)) | |
510 | match = false; | |
511 | ||
512 | return match; | |
513 | } | |
514 | ||
515 | static inline struct tracing_map_elt * | |
516 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |
517 | { | |
518 | u32 idx, key_hash, test_key; | |
cbf4100e | 519 | int dup_try = 0; |
08d43a5f | 520 | struct tracing_map_entry *entry; |
cbf4100e | 521 | struct tracing_map_elt *val; |
08d43a5f TZ |
522 | |
523 | key_hash = jhash(key, map->key_size, 0); | |
524 | if (key_hash == 0) | |
525 | key_hash = 1; | |
526 | idx = key_hash >> (32 - (map->map_bits + 1)); | |
527 | ||
528 | while (1) { | |
529 | idx &= (map->map_size - 1); | |
530 | entry = TRACING_MAP_ENTRY(map->map, idx); | |
531 | test_key = entry->key; | |
532 | ||
cbf4100e VP |
533 | if (test_key && test_key == key_hash) { |
534 | val = READ_ONCE(entry->val); | |
535 | if (val && | |
536 | keys_match(key, val->key, map->key_size)) { | |
537 | if (!lookup_only) | |
538 | atomic64_inc(&map->hits); | |
539 | return val; | |
540 | } else if (unlikely(!val)) { | |
541 | /* | |
542 | * The key is present. But, val (pointer to elt | |
543 | * struct) is still NULL. which means some other | |
544 | * thread is in the process of inserting an | |
545 | * element. | |
546 | * | |
547 | * On top of that, it's key_hash is same as the | |
548 | * one being inserted right now. So, it's | |
549 | * possible that the element has the same | |
550 | * key as well. | |
551 | */ | |
552 | ||
553 | dup_try++; | |
554 | if (dup_try > map->map_size) { | |
555 | atomic64_inc(&map->drops); | |
556 | break; | |
557 | } | |
558 | continue; | |
559 | } | |
08d43a5f TZ |
560 | } |
561 | ||
562 | if (!test_key) { | |
563 | if (lookup_only) | |
564 | break; | |
565 | ||
566 | if (!cmpxchg(&entry->key, 0, key_hash)) { | |
567 | struct tracing_map_elt *elt; | |
568 | ||
569 | elt = get_free_elt(map); | |
570 | if (!elt) { | |
571 | atomic64_inc(&map->drops); | |
572 | entry->key = 0; | |
573 | break; | |
574 | } | |
575 | ||
576 | memcpy(elt->key, key, map->key_size); | |
2b447606 PP |
577 | /* |
578 | * Ensure the initialization is visible and | |
579 | * publish the elt. | |
580 | */ | |
581 | smp_wmb(); | |
582 | WRITE_ONCE(entry->val, elt); | |
08d43a5f TZ |
583 | atomic64_inc(&map->hits); |
584 | ||
585 | return entry->val; | |
cbf4100e VP |
586 | } else { |
587 | /* | |
588 | * cmpxchg() failed. Loop around once | |
589 | * more to check what key was inserted. | |
590 | */ | |
591 | dup_try++; | |
592 | continue; | |
08d43a5f TZ |
593 | } |
594 | } | |
595 | ||
596 | idx++; | |
597 | } | |
598 | ||
599 | return NULL; | |
600 | } | |
601 | ||
602 | /** | |
603 | * tracing_map_insert - Insert key and/or retrieve val from a tracing_map | |
604 | * @map: The tracing_map to insert into | |
605 | * @key: The key to insert | |
606 | * | |
607 | * Inserts a key into a tracing_map and creates and returns a new | |
608 | * tracing_map_elt for it, or if the key has already been inserted by | |
609 | * a previous call, returns the tracing_map_elt already associated | |
610 | * with it. When the map was created, the number of elements to be | |
611 | * allocated for the map was specified (internally maintained as | |
612 | * 'max_elts' in struct tracing_map), and that number of | |
613 | * tracing_map_elts was created by tracing_map_init(). This is the | |
614 | * pre-allocated pool of tracing_map_elts that tracing_map_insert() | |
615 | * will allocate from when adding new keys. Once that pool is | |
616 | * exhausted, tracing_map_insert() is useless and will return NULL to | |
617 | * signal that state. There are two user-visible tracing_map | |
618 | * variables, 'hits' and 'drops', which are updated by this function. | |
619 | * Every time an element is either successfully inserted or retrieved, | |
2b5894cc | 620 | * the 'hits' value is incremented. Every time an element insertion |
08d43a5f TZ |
621 | * fails, the 'drops' value is incremented. |
622 | * | |
623 | * This is a lock-free tracing map insertion function implementing a | |
624 | * modified form of Cliff Click's basic insertion algorithm. It | |
625 | * requires the table size be a power of two. To prevent any | |
626 | * possibility of an infinite loop we always make the internal table | |
627 | * size double the size of the requested table size (max_elts * 2). | |
628 | * Likewise, we never reuse a slot or resize or delete elements - when | |
629 | * we've reached max_elts entries, we simply return NULL once we've | |
630 | * run out of entries. Readers can at any point in time traverse the | |
631 | * tracing map and safely access the key/val pairs. | |
632 | * | |
633 | * Return: the tracing_map_elt pointer val associated with the key. | |
634 | * If this was a newly inserted key, the val will be a newly allocated | |
635 | * and associated tracing_map_elt pointer val. If the key wasn't | |
636 | * found and the pool of tracing_map_elts has been exhausted, NULL is | |
637 | * returned and no further insertions will succeed. | |
638 | */ | |
639 | struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key) | |
640 | { | |
641 | return __tracing_map_insert(map, key, false); | |
642 | } | |
643 | ||
644 | /** | |
645 | * tracing_map_lookup - Retrieve val from a tracing_map | |
646 | * @map: The tracing_map to perform the lookup on | |
647 | * @key: The key to look up | |
648 | * | |
649 | * Looks up key in tracing_map and if found returns the matching | |
650 | * tracing_map_elt. This is a lock-free lookup; see | |
651 | * tracing_map_insert() for details on tracing_map and how it works. | |
652 | * Every time an element is retrieved, the 'hits' value is | |
2b5894cc | 653 | * incremented. There is one user-visible tracing_map variable, |
08d43a5f | 654 | * 'hits', which is updated by this function. Every time an element |
2b5894cc | 655 | * is successfully retrieved, the 'hits' value is incremented. The |
08d43a5f TZ |
656 | * 'drops' value is never updated by this function. |
657 | * | |
658 | * Return: the tracing_map_elt pointer val associated with the key. | |
659 | * If the key wasn't found, NULL is returned. | |
660 | */ | |
661 | struct tracing_map_elt *tracing_map_lookup(struct tracing_map *map, void *key) | |
662 | { | |
663 | return __tracing_map_insert(map, key, true); | |
664 | } | |
665 | ||
666 | /** | |
667 | * tracing_map_destroy - Destroy a tracing_map | |
668 | * @map: The tracing_map to destroy | |
669 | * | |
670 | * Frees a tracing_map along with its associated array of | |
671 | * tracing_map_elts. | |
672 | * | |
673 | * Callers should make sure there are no readers or writers actively | |
674 | * reading or inserting into the map before calling this. | |
675 | */ | |
676 | void tracing_map_destroy(struct tracing_map *map) | |
677 | { | |
678 | if (!map) | |
679 | return; | |
680 | ||
681 | tracing_map_free_elts(map); | |
682 | ||
683 | tracing_map_array_free(map->map); | |
684 | kfree(map); | |
685 | } | |
686 | ||
687 | /** | |
688 | * tracing_map_clear - Clear a tracing_map | |
689 | * @map: The tracing_map to clear | |
690 | * | |
691 | * Resets the tracing map to a cleared or initial state. The | |
692 | * tracing_map_elts are all cleared, and the array of struct | |
693 | * tracing_map_entry is reset to an initialized state. | |
694 | * | |
695 | * Callers should make sure there are no writers actively inserting | |
696 | * into the map before calling this. | |
697 | */ | |
698 | void tracing_map_clear(struct tracing_map *map) | |
699 | { | |
700 | unsigned int i; | |
701 | ||
bcf86c01 | 702 | atomic_set(&map->next_elt, 0); |
08d43a5f TZ |
703 | atomic64_set(&map->hits, 0); |
704 | atomic64_set(&map->drops, 0); | |
705 | ||
706 | tracing_map_array_clear(map->map); | |
707 | ||
708 | for (i = 0; i < map->max_elts; i++) | |
709 | tracing_map_elt_clear(*(TRACING_MAP_ELT(map->elts, i))); | |
710 | } | |
711 | ||
712 | static void set_sort_key(struct tracing_map *map, | |
713 | struct tracing_map_sort_key *sort_key) | |
714 | { | |
715 | map->sort_key = *sort_key; | |
716 | } | |
717 | ||
718 | /** | |
719 | * tracing_map_create - Create a lock-free map and element pool | |
720 | * @map_bits: The size of the map (2 ** map_bits) | |
721 | * @key_size: The size of the key for the map in bytes | |
722 | * @ops: Optional client-defined tracing_map_ops instance | |
723 | * @private_data: Client data associated with the map | |
724 | * | |
725 | * Creates and sets up a map to contain 2 ** map_bits number of | |
726 | * elements (internally maintained as 'max_elts' in struct | |
727 | * tracing_map). Before using, map fields should be added to the map | |
728 | * with tracing_map_add_sum_field() and tracing_map_add_key_field(). | |
729 | * tracing_map_init() should then be called to allocate the array of | |
730 | * tracing_map_elts, in order to avoid allocating anything in the map | |
731 | * insertion path. The user-specified map size reflects the maximum | |
732 | * number of elements that can be contained in the table requested by | |
733 | * the user - internally we double that in order to keep the table | |
734 | * sparse and keep collisions manageable. | |
735 | * | |
736 | * A tracing_map is a special-purpose map designed to aggregate or | |
737 | * 'sum' one or more values associated with a specific object of type | |
738 | * tracing_map_elt, which is attached by the map to a given key. | |
739 | * | |
740 | * tracing_map_create() sets up the map itself, and provides | |
741 | * operations for inserting tracing_map_elts, but doesn't allocate the | |
742 | * tracing_map_elts themselves, or provide a means for describing the | |
743 | * keys or sums associated with the tracing_map_elts. All | |
744 | * tracing_map_elts for a given map have the same set of sums and | |
745 | * keys, which are defined by the client using the functions | |
746 | * tracing_map_add_key_field() and tracing_map_add_sum_field(). Once | |
747 | * the fields are defined, the pool of elements allocated for the map | |
748 | * can be created, which occurs when the client code calls | |
749 | * tracing_map_init(). | |
750 | * | |
751 | * When tracing_map_init() returns, tracing_map_elt elements can be | |
752 | * inserted into the map using tracing_map_insert(). When called, | |
753 | * tracing_map_insert() grabs a free tracing_map_elt from the pool, or | |
754 | * finds an existing match in the map and in either case returns it. | |
755 | * The client can then use tracing_map_update_sum() and | |
756 | * tracing_map_read_sum() to update or read a given sum field for the | |
757 | * tracing_map_elt. | |
758 | * | |
759 | * The client can at any point retrieve and traverse the current set | |
760 | * of inserted tracing_map_elts in a tracing_map, via | |
761 | * tracing_map_sort_entries(). Sorting can be done on any field, | |
762 | * including keys. | |
763 | * | |
764 | * See tracing_map.h for a description of tracing_map_ops. | |
765 | * | |
766 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
767 | */ | |
768 | struct tracing_map *tracing_map_create(unsigned int map_bits, | |
769 | unsigned int key_size, | |
770 | const struct tracing_map_ops *ops, | |
771 | void *private_data) | |
772 | { | |
773 | struct tracing_map *map; | |
774 | unsigned int i; | |
775 | ||
776 | if (map_bits < TRACING_MAP_BITS_MIN || | |
777 | map_bits > TRACING_MAP_BITS_MAX) | |
778 | return ERR_PTR(-EINVAL); | |
779 | ||
780 | map = kzalloc(sizeof(*map), GFP_KERNEL); | |
781 | if (!map) | |
782 | return ERR_PTR(-ENOMEM); | |
783 | ||
784 | map->map_bits = map_bits; | |
785 | map->max_elts = (1 << map_bits); | |
bcf86c01 | 786 | atomic_set(&map->next_elt, 0); |
08d43a5f TZ |
787 | |
788 | map->map_size = (1 << (map_bits + 1)); | |
789 | map->ops = ops; | |
790 | ||
791 | map->private_data = private_data; | |
792 | ||
793 | map->map = tracing_map_array_alloc(map->map_size, | |
794 | sizeof(struct tracing_map_entry)); | |
795 | if (!map->map) | |
796 | goto free; | |
797 | ||
798 | map->key_size = key_size; | |
799 | for (i = 0; i < TRACING_MAP_KEYS_MAX; i++) | |
800 | map->key_idx[i] = -1; | |
801 | out: | |
802 | return map; | |
803 | free: | |
804 | tracing_map_destroy(map); | |
805 | map = ERR_PTR(-ENOMEM); | |
806 | ||
807 | goto out; | |
808 | } | |
809 | ||
810 | /** | |
811 | * tracing_map_init - Allocate and clear a map's tracing_map_elts | |
812 | * @map: The tracing_map to initialize | |
813 | * | |
814 | * Allocates a clears a pool of tracing_map_elts equal to the | |
815 | * user-specified size of 2 ** map_bits (internally maintained as | |
816 | * 'max_elts' in struct tracing_map). Before using, the map fields | |
817 | * should be added to the map with tracing_map_add_sum_field() and | |
818 | * tracing_map_add_key_field(). tracing_map_init() should then be | |
819 | * called to allocate the array of tracing_map_elts, in order to avoid | |
820 | * allocating anything in the map insertion path. The user-specified | |
821 | * map size reflects the max number of elements requested by the user | |
822 | * - internally we double that in order to keep the table sparse and | |
823 | * keep collisions manageable. | |
824 | * | |
825 | * See tracing_map.h for a description of tracing_map_ops. | |
826 | * | |
827 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
828 | */ | |
829 | int tracing_map_init(struct tracing_map *map) | |
830 | { | |
831 | int err; | |
832 | ||
833 | if (map->n_fields < 2) | |
834 | return -EINVAL; /* need at least 1 key and 1 val */ | |
835 | ||
836 | err = tracing_map_alloc_elts(map); | |
837 | if (err) | |
838 | return err; | |
839 | ||
840 | tracing_map_clear(map); | |
841 | ||
842 | return err; | |
843 | } | |
844 | ||
7ce1bb83 | 845 | static int cmp_entries_dup(const void *A, const void *B) |
08d43a5f | 846 | { |
7ce1bb83 | 847 | const struct tracing_map_sort_entry *a, *b; |
08d43a5f TZ |
848 | int ret = 0; |
849 | ||
7ce1bb83 KS |
850 | a = *(const struct tracing_map_sort_entry **)A; |
851 | b = *(const struct tracing_map_sort_entry **)B; | |
852 | ||
853 | if (memcmp(a->key, b->key, a->elt->map->key_size)) | |
08d43a5f TZ |
854 | ret = 1; |
855 | ||
856 | return ret; | |
857 | } | |
858 | ||
7ce1bb83 | 859 | static int cmp_entries_sum(const void *A, const void *B) |
08d43a5f TZ |
860 | { |
861 | const struct tracing_map_elt *elt_a, *elt_b; | |
7ce1bb83 | 862 | const struct tracing_map_sort_entry *a, *b; |
08d43a5f TZ |
863 | struct tracing_map_sort_key *sort_key; |
864 | struct tracing_map_field *field; | |
865 | tracing_map_cmp_fn_t cmp_fn; | |
866 | void *val_a, *val_b; | |
867 | int ret = 0; | |
868 | ||
7ce1bb83 KS |
869 | a = *(const struct tracing_map_sort_entry **)A; |
870 | b = *(const struct tracing_map_sort_entry **)B; | |
871 | ||
872 | elt_a = a->elt; | |
873 | elt_b = b->elt; | |
08d43a5f TZ |
874 | |
875 | sort_key = &elt_a->map->sort_key; | |
876 | ||
877 | field = &elt_a->fields[sort_key->field_idx]; | |
878 | cmp_fn = field->cmp_fn; | |
879 | ||
880 | val_a = &elt_a->fields[sort_key->field_idx].sum; | |
881 | val_b = &elt_b->fields[sort_key->field_idx].sum; | |
882 | ||
883 | ret = cmp_fn(val_a, val_b); | |
884 | if (sort_key->descending) | |
885 | ret = -ret; | |
886 | ||
887 | return ret; | |
888 | } | |
889 | ||
7ce1bb83 | 890 | static int cmp_entries_key(const void *A, const void *B) |
08d43a5f TZ |
891 | { |
892 | const struct tracing_map_elt *elt_a, *elt_b; | |
7ce1bb83 | 893 | const struct tracing_map_sort_entry *a, *b; |
08d43a5f TZ |
894 | struct tracing_map_sort_key *sort_key; |
895 | struct tracing_map_field *field; | |
896 | tracing_map_cmp_fn_t cmp_fn; | |
897 | void *val_a, *val_b; | |
898 | int ret = 0; | |
899 | ||
7ce1bb83 KS |
900 | a = *(const struct tracing_map_sort_entry **)A; |
901 | b = *(const struct tracing_map_sort_entry **)B; | |
902 | ||
903 | elt_a = a->elt; | |
904 | elt_b = b->elt; | |
08d43a5f TZ |
905 | |
906 | sort_key = &elt_a->map->sort_key; | |
907 | ||
908 | field = &elt_a->fields[sort_key->field_idx]; | |
909 | ||
910 | cmp_fn = field->cmp_fn; | |
911 | ||
912 | val_a = elt_a->key + field->offset; | |
913 | val_b = elt_b->key + field->offset; | |
914 | ||
915 | ret = cmp_fn(val_a, val_b); | |
916 | if (sort_key->descending) | |
917 | ret = -ret; | |
918 | ||
919 | return ret; | |
920 | } | |
921 | ||
922 | static void destroy_sort_entry(struct tracing_map_sort_entry *entry) | |
923 | { | |
924 | if (!entry) | |
925 | return; | |
926 | ||
927 | if (entry->elt_copied) | |
928 | tracing_map_elt_free(entry->elt); | |
929 | ||
930 | kfree(entry); | |
931 | } | |
932 | ||
933 | /** | |
934 | * tracing_map_destroy_sort_entries - Destroy an array of sort entries | |
935 | * @entries: The entries to destroy | |
936 | * @n_entries: The number of entries in the array | |
937 | * | |
938 | * Destroy the elements returned by a tracing_map_sort_entries() call. | |
939 | */ | |
940 | void tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, | |
941 | unsigned int n_entries) | |
942 | { | |
943 | unsigned int i; | |
944 | ||
945 | for (i = 0; i < n_entries; i++) | |
946 | destroy_sort_entry(entries[i]); | |
947 | ||
948 | vfree(entries); | |
949 | } | |
950 | ||
951 | static struct tracing_map_sort_entry * | |
952 | create_sort_entry(void *key, struct tracing_map_elt *elt) | |
953 | { | |
954 | struct tracing_map_sort_entry *sort_entry; | |
955 | ||
956 | sort_entry = kzalloc(sizeof(*sort_entry), GFP_KERNEL); | |
957 | if (!sort_entry) | |
958 | return NULL; | |
959 | ||
960 | sort_entry->key = key; | |
961 | sort_entry->elt = elt; | |
962 | ||
963 | return sort_entry; | |
964 | } | |
965 | ||
c193707d | 966 | static void detect_dups(struct tracing_map_sort_entry **sort_entries, |
08d43a5f TZ |
967 | int n_entries, unsigned int key_size) |
968 | { | |
ed87277f | 969 | unsigned int total_dups = 0; |
c193707d | 970 | int i; |
08d43a5f TZ |
971 | void *key; |
972 | ||
973 | if (n_entries < 2) | |
c193707d | 974 | return; |
08d43a5f TZ |
975 | |
976 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
977 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); | |
978 | ||
979 | key = sort_entries[0]->key; | |
980 | for (i = 1; i < n_entries; i++) { | |
981 | if (!memcmp(sort_entries[i]->key, key, key_size)) { | |
ed87277f | 982 | total_dups++; |
08d43a5f TZ |
983 | continue; |
984 | } | |
985 | key = sort_entries[i]->key; | |
08d43a5f TZ |
986 | } |
987 | ||
c193707d VP |
988 | WARN_ONCE(total_dups > 0, |
989 | "Duplicates detected: %d\n", total_dups); | |
08d43a5f TZ |
990 | } |
991 | ||
992 | static bool is_key(struct tracing_map *map, unsigned int field_idx) | |
993 | { | |
994 | unsigned int i; | |
995 | ||
996 | for (i = 0; i < map->n_keys; i++) | |
997 | if (map->key_idx[i] == field_idx) | |
998 | return true; | |
999 | return false; | |
1000 | } | |
1001 | ||
1002 | static void sort_secondary(struct tracing_map *map, | |
1003 | const struct tracing_map_sort_entry **entries, | |
1004 | unsigned int n_entries, | |
1005 | struct tracing_map_sort_key *primary_key, | |
1006 | struct tracing_map_sort_key *secondary_key) | |
1007 | { | |
7ce1bb83 KS |
1008 | int (*primary_fn)(const void *, const void *); |
1009 | int (*secondary_fn)(const void *, const void *); | |
08d43a5f TZ |
1010 | unsigned i, start = 0, n_sub = 1; |
1011 | ||
1012 | if (is_key(map, primary_key->field_idx)) | |
1013 | primary_fn = cmp_entries_key; | |
1014 | else | |
1015 | primary_fn = cmp_entries_sum; | |
1016 | ||
1017 | if (is_key(map, secondary_key->field_idx)) | |
1018 | secondary_fn = cmp_entries_key; | |
1019 | else | |
1020 | secondary_fn = cmp_entries_sum; | |
1021 | ||
1022 | for (i = 0; i < n_entries - 1; i++) { | |
1023 | const struct tracing_map_sort_entry **a = &entries[i]; | |
1024 | const struct tracing_map_sort_entry **b = &entries[i + 1]; | |
1025 | ||
1026 | if (primary_fn(a, b) == 0) { | |
1027 | n_sub++; | |
1028 | if (i < n_entries - 2) | |
1029 | continue; | |
1030 | } | |
1031 | ||
1032 | if (n_sub < 2) { | |
1033 | start = i + 1; | |
1034 | n_sub = 1; | |
1035 | continue; | |
1036 | } | |
1037 | ||
1038 | set_sort_key(map, secondary_key); | |
1039 | sort(&entries[start], n_sub, | |
1040 | sizeof(struct tracing_map_sort_entry *), | |
1041 | (int (*)(const void *, const void *))secondary_fn, NULL); | |
1042 | set_sort_key(map, primary_key); | |
1043 | ||
1044 | start = i + 1; | |
1045 | n_sub = 1; | |
1046 | } | |
1047 | } | |
1048 | ||
1049 | /** | |
1050 | * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map | |
1051 | * @map: The tracing_map | |
adaa0a9f YL |
1052 | * @sort_keys: The sort key to use for sorting |
1053 | * @n_sort_keys: hitcount, always have at least one | |
08d43a5f TZ |
1054 | * @sort_entries: outval: pointer to allocated and sorted array of entries |
1055 | * | |
1056 | * tracing_map_sort_entries() sorts the current set of entries in the | |
1057 | * map and returns the list of tracing_map_sort_entries containing | |
1058 | * them to the client in the sort_entries param. The client can | |
1059 | * access the struct tracing_map_elt element of interest directly as | |
1060 | * the 'elt' field of a returned struct tracing_map_sort_entry object. | |
1061 | * | |
1062 | * The sort_key has only two fields: idx and descending. 'idx' refers | |
1063 | * to the index of the field added via tracing_map_add_sum_field() or | |
1064 | * tracing_map_add_key_field() when the tracing_map was initialized. | |
1065 | * 'descending' is a flag that if set reverses the sort order, which | |
1066 | * by default is ascending. | |
1067 | * | |
1068 | * The client should not hold on to the returned array but should use | |
1069 | * it and call tracing_map_destroy_sort_entries() when done. | |
1070 | * | |
1071 | * Return: the number of sort_entries in the struct tracing_map_sort_entry | |
1072 | * array, negative on error | |
1073 | */ | |
1074 | int tracing_map_sort_entries(struct tracing_map *map, | |
1075 | struct tracing_map_sort_key *sort_keys, | |
1076 | unsigned int n_sort_keys, | |
1077 | struct tracing_map_sort_entry ***sort_entries) | |
1078 | { | |
7ce1bb83 | 1079 | int (*cmp_entries_fn)(const void *, const void *); |
08d43a5f TZ |
1080 | struct tracing_map_sort_entry *sort_entry, **entries; |
1081 | int i, n_entries, ret; | |
1082 | ||
42bc47b3 | 1083 | entries = vmalloc(array_size(sizeof(sort_entry), map->max_elts)); |
08d43a5f TZ |
1084 | if (!entries) |
1085 | return -ENOMEM; | |
1086 | ||
1087 | for (i = 0, n_entries = 0; i < map->map_size; i++) { | |
1088 | struct tracing_map_entry *entry; | |
1089 | ||
1090 | entry = TRACING_MAP_ENTRY(map->map, i); | |
1091 | ||
1092 | if (!entry->key || !entry->val) | |
1093 | continue; | |
1094 | ||
1095 | entries[n_entries] = create_sort_entry(entry->val->key, | |
1096 | entry->val); | |
1097 | if (!entries[n_entries++]) { | |
1098 | ret = -ENOMEM; | |
1099 | goto free; | |
1100 | } | |
1101 | } | |
1102 | ||
1103 | if (n_entries == 0) { | |
1104 | ret = 0; | |
1105 | goto free; | |
1106 | } | |
1107 | ||
1108 | if (n_entries == 1) { | |
1109 | *sort_entries = entries; | |
1110 | return 1; | |
1111 | } | |
1112 | ||
c193707d | 1113 | detect_dups(entries, n_entries, map->key_size); |
08d43a5f TZ |
1114 | |
1115 | if (is_key(map, sort_keys[0].field_idx)) | |
1116 | cmp_entries_fn = cmp_entries_key; | |
1117 | else | |
1118 | cmp_entries_fn = cmp_entries_sum; | |
1119 | ||
1120 | set_sort_key(map, &sort_keys[0]); | |
1121 | ||
1122 | sort(entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
1123 | (int (*)(const void *, const void *))cmp_entries_fn, NULL); | |
1124 | ||
1125 | if (n_sort_keys > 1) | |
1126 | sort_secondary(map, | |
1127 | (const struct tracing_map_sort_entry **)entries, | |
1128 | n_entries, | |
1129 | &sort_keys[0], | |
1130 | &sort_keys[1]); | |
1131 | ||
1132 | *sort_entries = entries; | |
1133 | ||
1134 | return n_entries; | |
1135 | free: | |
1136 | tracing_map_destroy_sort_entries(entries, n_entries); | |
1137 | ||
1138 | return ret; | |
1139 | } |