Commit | Line | Data |
---|---|---|
08d43a5f TZ |
1 | /* |
2 | * tracing_map - lock-free map for tracing | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License as published by | |
6 | * the Free Software Foundation; either version 2 of the License, or | |
7 | * (at your option) any later version. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
13 | * | |
14 | * Copyright (C) 2015 Tom Zanussi <tom.zanussi@linux.intel.com> | |
15 | * | |
16 | * tracing_map implementation inspired by lock-free map algorithms | |
17 | * originated by Dr. Cliff Click: | |
18 | * | |
19 | * http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable | |
20 | * http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf | |
21 | */ | |
22 | ||
23 | #include <linux/vmalloc.h> | |
24 | #include <linux/jhash.h> | |
25 | #include <linux/slab.h> | |
26 | #include <linux/sort.h> | |
27 | ||
28 | #include "tracing_map.h" | |
29 | #include "trace.h" | |
30 | ||
31 | /* | |
32 | * NOTE: For a detailed description of the data structures used by | |
33 | * these functions (such as tracing_map_elt) please see the overview | |
34 | * of tracing_map data structures at the beginning of tracing_map.h. | |
35 | */ | |
36 | ||
37 | /** | |
38 | * tracing_map_update_sum - Add a value to a tracing_map_elt's sum field | |
39 | * @elt: The tracing_map_elt | |
40 | * @i: The index of the given sum associated with the tracing_map_elt | |
41 | * @n: The value to add to the sum | |
42 | * | |
43 | * Add n to sum i associated with the specified tracing_map_elt | |
44 | * instance. The index i is the index returned by the call to | |
45 | * tracing_map_add_sum_field() when the tracing map was set up. | |
46 | */ | |
47 | void tracing_map_update_sum(struct tracing_map_elt *elt, unsigned int i, u64 n) | |
48 | { | |
49 | atomic64_add(n, &elt->fields[i].sum); | |
50 | } | |
51 | ||
52 | /** | |
53 | * tracing_map_read_sum - Return the value of a tracing_map_elt's sum field | |
54 | * @elt: The tracing_map_elt | |
55 | * @i: The index of the given sum associated with the tracing_map_elt | |
56 | * | |
57 | * Retrieve the value of the sum i associated with the specified | |
58 | * tracing_map_elt instance. The index i is the index returned by the | |
59 | * call to tracing_map_add_sum_field() when the tracing map was set | |
60 | * up. | |
61 | * | |
62 | * Return: The sum associated with field i for elt. | |
63 | */ | |
64 | u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i) | |
65 | { | |
66 | return (u64)atomic64_read(&elt->fields[i].sum); | |
67 | } | |
68 | ||
69 | int tracing_map_cmp_string(void *val_a, void *val_b) | |
70 | { | |
71 | char *a = val_a; | |
72 | char *b = val_b; | |
73 | ||
74 | return strcmp(a, b); | |
75 | } | |
76 | ||
77 | int tracing_map_cmp_none(void *val_a, void *val_b) | |
78 | { | |
79 | return 0; | |
80 | } | |
81 | ||
82 | static int tracing_map_cmp_atomic64(void *val_a, void *val_b) | |
83 | { | |
84 | u64 a = atomic64_read((atomic64_t *)val_a); | |
85 | u64 b = atomic64_read((atomic64_t *)val_b); | |
86 | ||
87 | return (a > b) ? 1 : ((a < b) ? -1 : 0); | |
88 | } | |
89 | ||
90 | #define DEFINE_TRACING_MAP_CMP_FN(type) \ | |
91 | static int tracing_map_cmp_##type(void *val_a, void *val_b) \ | |
92 | { \ | |
93 | type a = *(type *)val_a; \ | |
94 | type b = *(type *)val_b; \ | |
95 | \ | |
96 | return (a > b) ? 1 : ((a < b) ? -1 : 0); \ | |
97 | } | |
98 | ||
99 | DEFINE_TRACING_MAP_CMP_FN(s64); | |
100 | DEFINE_TRACING_MAP_CMP_FN(u64); | |
101 | DEFINE_TRACING_MAP_CMP_FN(s32); | |
102 | DEFINE_TRACING_MAP_CMP_FN(u32); | |
103 | DEFINE_TRACING_MAP_CMP_FN(s16); | |
104 | DEFINE_TRACING_MAP_CMP_FN(u16); | |
105 | DEFINE_TRACING_MAP_CMP_FN(s8); | |
106 | DEFINE_TRACING_MAP_CMP_FN(u8); | |
107 | ||
108 | tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size, | |
109 | int field_is_signed) | |
110 | { | |
111 | tracing_map_cmp_fn_t fn = tracing_map_cmp_none; | |
112 | ||
113 | switch (field_size) { | |
114 | case 8: | |
115 | if (field_is_signed) | |
116 | fn = tracing_map_cmp_s64; | |
117 | else | |
118 | fn = tracing_map_cmp_u64; | |
119 | break; | |
120 | case 4: | |
121 | if (field_is_signed) | |
122 | fn = tracing_map_cmp_s32; | |
123 | else | |
124 | fn = tracing_map_cmp_u32; | |
125 | break; | |
126 | case 2: | |
127 | if (field_is_signed) | |
128 | fn = tracing_map_cmp_s16; | |
129 | else | |
130 | fn = tracing_map_cmp_u16; | |
131 | break; | |
132 | case 1: | |
133 | if (field_is_signed) | |
134 | fn = tracing_map_cmp_s8; | |
135 | else | |
136 | fn = tracing_map_cmp_u8; | |
137 | break; | |
138 | } | |
139 | ||
140 | return fn; | |
141 | } | |
142 | ||
143 | static int tracing_map_add_field(struct tracing_map *map, | |
144 | tracing_map_cmp_fn_t cmp_fn) | |
145 | { | |
146 | int ret = -EINVAL; | |
147 | ||
148 | if (map->n_fields < TRACING_MAP_FIELDS_MAX) { | |
149 | ret = map->n_fields; | |
150 | map->fields[map->n_fields++].cmp_fn = cmp_fn; | |
151 | } | |
152 | ||
153 | return ret; | |
154 | } | |
155 | ||
156 | /** | |
157 | * tracing_map_add_sum_field - Add a field describing a tracing_map sum | |
158 | * @map: The tracing_map | |
159 | * | |
160 | * Add a sum field to the key and return the index identifying it in | |
161 | * the map and associated tracing_map_elts. This is the index used | |
162 | * for instance to update a sum for a particular tracing_map_elt using | |
163 | * tracing_map_update_sum() or reading it via tracing_map_read_sum(). | |
164 | * | |
165 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 166 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
167 | */ |
168 | int tracing_map_add_sum_field(struct tracing_map *map) | |
169 | { | |
170 | return tracing_map_add_field(map, tracing_map_cmp_atomic64); | |
171 | } | |
172 | ||
173 | /** | |
174 | * tracing_map_add_key_field - Add a field describing a tracing_map key | |
175 | * @map: The tracing_map | |
176 | * @offset: The offset within the key | |
177 | * @cmp_fn: The comparison function that will be used to sort on the key | |
178 | * | |
179 | * Let the map know there is a key and that if it's used as a sort key | |
180 | * to use cmp_fn. | |
181 | * | |
182 | * A key can be a subset of a compound key; for that purpose, the | |
183 | * offset param is used to describe where within the the compound key | |
184 | * the key referenced by this key field resides. | |
185 | * | |
186 | * Return: The index identifying the field in the map and associated | |
3b772b96 | 187 | * tracing_map_elts, or -EINVAL on error. |
08d43a5f TZ |
188 | */ |
189 | int tracing_map_add_key_field(struct tracing_map *map, | |
190 | unsigned int offset, | |
191 | tracing_map_cmp_fn_t cmp_fn) | |
192 | ||
193 | { | |
194 | int idx = tracing_map_add_field(map, cmp_fn); | |
195 | ||
196 | if (idx < 0) | |
197 | return idx; | |
198 | ||
199 | map->fields[idx].offset = offset; | |
200 | ||
201 | map->key_idx[map->n_keys++] = idx; | |
202 | ||
203 | return idx; | |
204 | } | |
205 | ||
206 | void tracing_map_array_clear(struct tracing_map_array *a) | |
207 | { | |
208 | unsigned int i; | |
209 | ||
210 | if (!a->pages) | |
211 | return; | |
212 | ||
213 | for (i = 0; i < a->n_pages; i++) | |
214 | memset(a->pages[i], 0, PAGE_SIZE); | |
215 | } | |
216 | ||
217 | void tracing_map_array_free(struct tracing_map_array *a) | |
218 | { | |
219 | unsigned int i; | |
220 | ||
221 | if (!a) | |
222 | return; | |
223 | ||
224 | if (!a->pages) { | |
225 | kfree(a); | |
226 | return; | |
227 | } | |
228 | ||
229 | for (i = 0; i < a->n_pages; i++) { | |
230 | if (!a->pages[i]) | |
231 | break; | |
232 | free_page((unsigned long)a->pages[i]); | |
233 | } | |
234 | } | |
235 | ||
236 | struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, | |
237 | unsigned int entry_size) | |
238 | { | |
239 | struct tracing_map_array *a; | |
240 | unsigned int i; | |
241 | ||
242 | a = kzalloc(sizeof(*a), GFP_KERNEL); | |
243 | if (!a) | |
244 | return NULL; | |
245 | ||
246 | a->entry_size_shift = fls(roundup_pow_of_two(entry_size) - 1); | |
247 | a->entries_per_page = PAGE_SIZE / (1 << a->entry_size_shift); | |
248 | a->n_pages = n_elts / a->entries_per_page; | |
249 | if (!a->n_pages) | |
250 | a->n_pages = 1; | |
251 | a->entry_shift = fls(a->entries_per_page) - 1; | |
252 | a->entry_mask = (1 << a->entry_shift) - 1; | |
253 | ||
254 | a->pages = kcalloc(a->n_pages, sizeof(void *), GFP_KERNEL); | |
255 | if (!a->pages) | |
256 | goto free; | |
257 | ||
258 | for (i = 0; i < a->n_pages; i++) { | |
259 | a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); | |
260 | if (!a->pages[i]) | |
261 | goto free; | |
262 | } | |
263 | out: | |
264 | return a; | |
265 | free: | |
266 | tracing_map_array_free(a); | |
267 | a = NULL; | |
268 | ||
269 | goto out; | |
270 | } | |
271 | ||
272 | static void tracing_map_elt_clear(struct tracing_map_elt *elt) | |
273 | { | |
274 | unsigned i; | |
275 | ||
276 | for (i = 0; i < elt->map->n_fields; i++) | |
277 | if (elt->fields[i].cmp_fn == tracing_map_cmp_atomic64) | |
278 | atomic64_set(&elt->fields[i].sum, 0); | |
279 | ||
280 | if (elt->map->ops && elt->map->ops->elt_clear) | |
281 | elt->map->ops->elt_clear(elt); | |
282 | } | |
283 | ||
284 | static void tracing_map_elt_init_fields(struct tracing_map_elt *elt) | |
285 | { | |
286 | unsigned int i; | |
287 | ||
288 | tracing_map_elt_clear(elt); | |
289 | ||
290 | for (i = 0; i < elt->map->n_fields; i++) { | |
291 | elt->fields[i].cmp_fn = elt->map->fields[i].cmp_fn; | |
292 | ||
293 | if (elt->fields[i].cmp_fn != tracing_map_cmp_atomic64) | |
294 | elt->fields[i].offset = elt->map->fields[i].offset; | |
295 | } | |
296 | } | |
297 | ||
298 | static void tracing_map_elt_free(struct tracing_map_elt *elt) | |
299 | { | |
300 | if (!elt) | |
301 | return; | |
302 | ||
303 | if (elt->map->ops && elt->map->ops->elt_free) | |
304 | elt->map->ops->elt_free(elt); | |
305 | kfree(elt->fields); | |
306 | kfree(elt->key); | |
307 | kfree(elt); | |
308 | } | |
309 | ||
310 | static struct tracing_map_elt *tracing_map_elt_alloc(struct tracing_map *map) | |
311 | { | |
312 | struct tracing_map_elt *elt; | |
313 | int err = 0; | |
314 | ||
315 | elt = kzalloc(sizeof(*elt), GFP_KERNEL); | |
316 | if (!elt) | |
317 | return ERR_PTR(-ENOMEM); | |
318 | ||
319 | elt->map = map; | |
320 | ||
321 | elt->key = kzalloc(map->key_size, GFP_KERNEL); | |
322 | if (!elt->key) { | |
323 | err = -ENOMEM; | |
324 | goto free; | |
325 | } | |
326 | ||
327 | elt->fields = kcalloc(map->n_fields, sizeof(*elt->fields), GFP_KERNEL); | |
328 | if (!elt->fields) { | |
329 | err = -ENOMEM; | |
330 | goto free; | |
331 | } | |
332 | ||
333 | tracing_map_elt_init_fields(elt); | |
334 | ||
335 | if (map->ops && map->ops->elt_alloc) { | |
336 | err = map->ops->elt_alloc(elt); | |
337 | if (err) | |
338 | goto free; | |
339 | } | |
340 | return elt; | |
341 | free: | |
342 | tracing_map_elt_free(elt); | |
343 | ||
344 | return ERR_PTR(err); | |
345 | } | |
346 | ||
347 | static struct tracing_map_elt *get_free_elt(struct tracing_map *map) | |
348 | { | |
349 | struct tracing_map_elt *elt = NULL; | |
350 | int idx; | |
351 | ||
352 | idx = atomic_inc_return(&map->next_elt); | |
353 | if (idx < map->max_elts) { | |
354 | elt = *(TRACING_MAP_ELT(map->elts, idx)); | |
355 | if (map->ops && map->ops->elt_init) | |
356 | map->ops->elt_init(elt); | |
357 | } | |
358 | ||
359 | return elt; | |
360 | } | |
361 | ||
362 | static void tracing_map_free_elts(struct tracing_map *map) | |
363 | { | |
364 | unsigned int i; | |
365 | ||
366 | if (!map->elts) | |
367 | return; | |
368 | ||
6e4cf657 | 369 | for (i = 0; i < map->max_elts; i++) { |
08d43a5f | 370 | tracing_map_elt_free(*(TRACING_MAP_ELT(map->elts, i))); |
6e4cf657 TZ |
371 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; |
372 | } | |
08d43a5f TZ |
373 | |
374 | tracing_map_array_free(map->elts); | |
6e4cf657 | 375 | map->elts = NULL; |
08d43a5f TZ |
376 | } |
377 | ||
378 | static int tracing_map_alloc_elts(struct tracing_map *map) | |
379 | { | |
380 | unsigned int i; | |
381 | ||
382 | map->elts = tracing_map_array_alloc(map->max_elts, | |
383 | sizeof(struct tracing_map_elt *)); | |
384 | if (!map->elts) | |
385 | return -ENOMEM; | |
386 | ||
387 | for (i = 0; i < map->max_elts; i++) { | |
388 | *(TRACING_MAP_ELT(map->elts, i)) = tracing_map_elt_alloc(map); | |
6e4cf657 TZ |
389 | if (IS_ERR(*(TRACING_MAP_ELT(map->elts, i)))) { |
390 | *(TRACING_MAP_ELT(map->elts, i)) = NULL; | |
08d43a5f TZ |
391 | tracing_map_free_elts(map); |
392 | ||
393 | return -ENOMEM; | |
394 | } | |
395 | } | |
396 | ||
397 | return 0; | |
398 | } | |
399 | ||
400 | static inline bool keys_match(void *key, void *test_key, unsigned key_size) | |
401 | { | |
402 | bool match = true; | |
403 | ||
404 | if (memcmp(key, test_key, key_size)) | |
405 | match = false; | |
406 | ||
407 | return match; | |
408 | } | |
409 | ||
410 | static inline struct tracing_map_elt * | |
411 | __tracing_map_insert(struct tracing_map *map, void *key, bool lookup_only) | |
412 | { | |
413 | u32 idx, key_hash, test_key; | |
414 | struct tracing_map_entry *entry; | |
415 | ||
416 | key_hash = jhash(key, map->key_size, 0); | |
417 | if (key_hash == 0) | |
418 | key_hash = 1; | |
419 | idx = key_hash >> (32 - (map->map_bits + 1)); | |
420 | ||
421 | while (1) { | |
422 | idx &= (map->map_size - 1); | |
423 | entry = TRACING_MAP_ENTRY(map->map, idx); | |
424 | test_key = entry->key; | |
425 | ||
426 | if (test_key && test_key == key_hash && entry->val && | |
427 | keys_match(key, entry->val->key, map->key_size)) { | |
428 | atomic64_inc(&map->hits); | |
429 | return entry->val; | |
430 | } | |
431 | ||
432 | if (!test_key) { | |
433 | if (lookup_only) | |
434 | break; | |
435 | ||
436 | if (!cmpxchg(&entry->key, 0, key_hash)) { | |
437 | struct tracing_map_elt *elt; | |
438 | ||
439 | elt = get_free_elt(map); | |
440 | if (!elt) { | |
441 | atomic64_inc(&map->drops); | |
442 | entry->key = 0; | |
443 | break; | |
444 | } | |
445 | ||
446 | memcpy(elt->key, key, map->key_size); | |
447 | entry->val = elt; | |
448 | atomic64_inc(&map->hits); | |
449 | ||
450 | return entry->val; | |
451 | } | |
452 | } | |
453 | ||
454 | idx++; | |
455 | } | |
456 | ||
457 | return NULL; | |
458 | } | |
459 | ||
460 | /** | |
461 | * tracing_map_insert - Insert key and/or retrieve val from a tracing_map | |
462 | * @map: The tracing_map to insert into | |
463 | * @key: The key to insert | |
464 | * | |
465 | * Inserts a key into a tracing_map and creates and returns a new | |
466 | * tracing_map_elt for it, or if the key has already been inserted by | |
467 | * a previous call, returns the tracing_map_elt already associated | |
468 | * with it. When the map was created, the number of elements to be | |
469 | * allocated for the map was specified (internally maintained as | |
470 | * 'max_elts' in struct tracing_map), and that number of | |
471 | * tracing_map_elts was created by tracing_map_init(). This is the | |
472 | * pre-allocated pool of tracing_map_elts that tracing_map_insert() | |
473 | * will allocate from when adding new keys. Once that pool is | |
474 | * exhausted, tracing_map_insert() is useless and will return NULL to | |
475 | * signal that state. There are two user-visible tracing_map | |
476 | * variables, 'hits' and 'drops', which are updated by this function. | |
477 | * Every time an element is either successfully inserted or retrieved, | |
478 | * the 'hits' value is incrememented. Every time an element insertion | |
479 | * fails, the 'drops' value is incremented. | |
480 | * | |
481 | * This is a lock-free tracing map insertion function implementing a | |
482 | * modified form of Cliff Click's basic insertion algorithm. It | |
483 | * requires the table size be a power of two. To prevent any | |
484 | * possibility of an infinite loop we always make the internal table | |
485 | * size double the size of the requested table size (max_elts * 2). | |
486 | * Likewise, we never reuse a slot or resize or delete elements - when | |
487 | * we've reached max_elts entries, we simply return NULL once we've | |
488 | * run out of entries. Readers can at any point in time traverse the | |
489 | * tracing map and safely access the key/val pairs. | |
490 | * | |
491 | * Return: the tracing_map_elt pointer val associated with the key. | |
492 | * If this was a newly inserted key, the val will be a newly allocated | |
493 | * and associated tracing_map_elt pointer val. If the key wasn't | |
494 | * found and the pool of tracing_map_elts has been exhausted, NULL is | |
495 | * returned and no further insertions will succeed. | |
496 | */ | |
497 | struct tracing_map_elt *tracing_map_insert(struct tracing_map *map, void *key) | |
498 | { | |
499 | return __tracing_map_insert(map, key, false); | |
500 | } | |
501 | ||
502 | /** | |
503 | * tracing_map_lookup - Retrieve val from a tracing_map | |
504 | * @map: The tracing_map to perform the lookup on | |
505 | * @key: The key to look up | |
506 | * | |
507 | * Looks up key in tracing_map and if found returns the matching | |
508 | * tracing_map_elt. This is a lock-free lookup; see | |
509 | * tracing_map_insert() for details on tracing_map and how it works. | |
510 | * Every time an element is retrieved, the 'hits' value is | |
511 | * incrememented. There is one user-visible tracing_map variable, | |
512 | * 'hits', which is updated by this function. Every time an element | |
513 | * is successfully retrieved, the 'hits' value is incrememented. The | |
514 | * 'drops' value is never updated by this function. | |
515 | * | |
516 | * Return: the tracing_map_elt pointer val associated with the key. | |
517 | * If the key wasn't found, NULL is returned. | |
518 | */ | |
519 | struct tracing_map_elt *tracing_map_lookup(struct tracing_map *map, void *key) | |
520 | { | |
521 | return __tracing_map_insert(map, key, true); | |
522 | } | |
523 | ||
524 | /** | |
525 | * tracing_map_destroy - Destroy a tracing_map | |
526 | * @map: The tracing_map to destroy | |
527 | * | |
528 | * Frees a tracing_map along with its associated array of | |
529 | * tracing_map_elts. | |
530 | * | |
531 | * Callers should make sure there are no readers or writers actively | |
532 | * reading or inserting into the map before calling this. | |
533 | */ | |
534 | void tracing_map_destroy(struct tracing_map *map) | |
535 | { | |
536 | if (!map) | |
537 | return; | |
538 | ||
539 | tracing_map_free_elts(map); | |
540 | ||
541 | tracing_map_array_free(map->map); | |
542 | kfree(map); | |
543 | } | |
544 | ||
545 | /** | |
546 | * tracing_map_clear - Clear a tracing_map | |
547 | * @map: The tracing_map to clear | |
548 | * | |
549 | * Resets the tracing map to a cleared or initial state. The | |
550 | * tracing_map_elts are all cleared, and the array of struct | |
551 | * tracing_map_entry is reset to an initialized state. | |
552 | * | |
553 | * Callers should make sure there are no writers actively inserting | |
554 | * into the map before calling this. | |
555 | */ | |
556 | void tracing_map_clear(struct tracing_map *map) | |
557 | { | |
558 | unsigned int i; | |
559 | ||
560 | atomic_set(&map->next_elt, -1); | |
561 | atomic64_set(&map->hits, 0); | |
562 | atomic64_set(&map->drops, 0); | |
563 | ||
564 | tracing_map_array_clear(map->map); | |
565 | ||
566 | for (i = 0; i < map->max_elts; i++) | |
567 | tracing_map_elt_clear(*(TRACING_MAP_ELT(map->elts, i))); | |
568 | } | |
569 | ||
570 | static void set_sort_key(struct tracing_map *map, | |
571 | struct tracing_map_sort_key *sort_key) | |
572 | { | |
573 | map->sort_key = *sort_key; | |
574 | } | |
575 | ||
576 | /** | |
577 | * tracing_map_create - Create a lock-free map and element pool | |
578 | * @map_bits: The size of the map (2 ** map_bits) | |
579 | * @key_size: The size of the key for the map in bytes | |
580 | * @ops: Optional client-defined tracing_map_ops instance | |
581 | * @private_data: Client data associated with the map | |
582 | * | |
583 | * Creates and sets up a map to contain 2 ** map_bits number of | |
584 | * elements (internally maintained as 'max_elts' in struct | |
585 | * tracing_map). Before using, map fields should be added to the map | |
586 | * with tracing_map_add_sum_field() and tracing_map_add_key_field(). | |
587 | * tracing_map_init() should then be called to allocate the array of | |
588 | * tracing_map_elts, in order to avoid allocating anything in the map | |
589 | * insertion path. The user-specified map size reflects the maximum | |
590 | * number of elements that can be contained in the table requested by | |
591 | * the user - internally we double that in order to keep the table | |
592 | * sparse and keep collisions manageable. | |
593 | * | |
594 | * A tracing_map is a special-purpose map designed to aggregate or | |
595 | * 'sum' one or more values associated with a specific object of type | |
596 | * tracing_map_elt, which is attached by the map to a given key. | |
597 | * | |
598 | * tracing_map_create() sets up the map itself, and provides | |
599 | * operations for inserting tracing_map_elts, but doesn't allocate the | |
600 | * tracing_map_elts themselves, or provide a means for describing the | |
601 | * keys or sums associated with the tracing_map_elts. All | |
602 | * tracing_map_elts for a given map have the same set of sums and | |
603 | * keys, which are defined by the client using the functions | |
604 | * tracing_map_add_key_field() and tracing_map_add_sum_field(). Once | |
605 | * the fields are defined, the pool of elements allocated for the map | |
606 | * can be created, which occurs when the client code calls | |
607 | * tracing_map_init(). | |
608 | * | |
609 | * When tracing_map_init() returns, tracing_map_elt elements can be | |
610 | * inserted into the map using tracing_map_insert(). When called, | |
611 | * tracing_map_insert() grabs a free tracing_map_elt from the pool, or | |
612 | * finds an existing match in the map and in either case returns it. | |
613 | * The client can then use tracing_map_update_sum() and | |
614 | * tracing_map_read_sum() to update or read a given sum field for the | |
615 | * tracing_map_elt. | |
616 | * | |
617 | * The client can at any point retrieve and traverse the current set | |
618 | * of inserted tracing_map_elts in a tracing_map, via | |
619 | * tracing_map_sort_entries(). Sorting can be done on any field, | |
620 | * including keys. | |
621 | * | |
622 | * See tracing_map.h for a description of tracing_map_ops. | |
623 | * | |
624 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
625 | */ | |
626 | struct tracing_map *tracing_map_create(unsigned int map_bits, | |
627 | unsigned int key_size, | |
628 | const struct tracing_map_ops *ops, | |
629 | void *private_data) | |
630 | { | |
631 | struct tracing_map *map; | |
632 | unsigned int i; | |
633 | ||
634 | if (map_bits < TRACING_MAP_BITS_MIN || | |
635 | map_bits > TRACING_MAP_BITS_MAX) | |
636 | return ERR_PTR(-EINVAL); | |
637 | ||
638 | map = kzalloc(sizeof(*map), GFP_KERNEL); | |
639 | if (!map) | |
640 | return ERR_PTR(-ENOMEM); | |
641 | ||
642 | map->map_bits = map_bits; | |
643 | map->max_elts = (1 << map_bits); | |
644 | atomic_set(&map->next_elt, -1); | |
645 | ||
646 | map->map_size = (1 << (map_bits + 1)); | |
647 | map->ops = ops; | |
648 | ||
649 | map->private_data = private_data; | |
650 | ||
651 | map->map = tracing_map_array_alloc(map->map_size, | |
652 | sizeof(struct tracing_map_entry)); | |
653 | if (!map->map) | |
654 | goto free; | |
655 | ||
656 | map->key_size = key_size; | |
657 | for (i = 0; i < TRACING_MAP_KEYS_MAX; i++) | |
658 | map->key_idx[i] = -1; | |
659 | out: | |
660 | return map; | |
661 | free: | |
662 | tracing_map_destroy(map); | |
663 | map = ERR_PTR(-ENOMEM); | |
664 | ||
665 | goto out; | |
666 | } | |
667 | ||
668 | /** | |
669 | * tracing_map_init - Allocate and clear a map's tracing_map_elts | |
670 | * @map: The tracing_map to initialize | |
671 | * | |
672 | * Allocates a clears a pool of tracing_map_elts equal to the | |
673 | * user-specified size of 2 ** map_bits (internally maintained as | |
674 | * 'max_elts' in struct tracing_map). Before using, the map fields | |
675 | * should be added to the map with tracing_map_add_sum_field() and | |
676 | * tracing_map_add_key_field(). tracing_map_init() should then be | |
677 | * called to allocate the array of tracing_map_elts, in order to avoid | |
678 | * allocating anything in the map insertion path. The user-specified | |
679 | * map size reflects the max number of elements requested by the user | |
680 | * - internally we double that in order to keep the table sparse and | |
681 | * keep collisions manageable. | |
682 | * | |
683 | * See tracing_map.h for a description of tracing_map_ops. | |
684 | * | |
685 | * Return: the tracing_map pointer if successful, ERR_PTR if not. | |
686 | */ | |
687 | int tracing_map_init(struct tracing_map *map) | |
688 | { | |
689 | int err; | |
690 | ||
691 | if (map->n_fields < 2) | |
692 | return -EINVAL; /* need at least 1 key and 1 val */ | |
693 | ||
694 | err = tracing_map_alloc_elts(map); | |
695 | if (err) | |
696 | return err; | |
697 | ||
698 | tracing_map_clear(map); | |
699 | ||
700 | return err; | |
701 | } | |
702 | ||
703 | static int cmp_entries_dup(const struct tracing_map_sort_entry **a, | |
704 | const struct tracing_map_sort_entry **b) | |
705 | { | |
706 | int ret = 0; | |
707 | ||
708 | if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) | |
709 | ret = 1; | |
710 | ||
711 | return ret; | |
712 | } | |
713 | ||
714 | static int cmp_entries_sum(const struct tracing_map_sort_entry **a, | |
715 | const struct tracing_map_sort_entry **b) | |
716 | { | |
717 | const struct tracing_map_elt *elt_a, *elt_b; | |
718 | struct tracing_map_sort_key *sort_key; | |
719 | struct tracing_map_field *field; | |
720 | tracing_map_cmp_fn_t cmp_fn; | |
721 | void *val_a, *val_b; | |
722 | int ret = 0; | |
723 | ||
724 | elt_a = (*a)->elt; | |
725 | elt_b = (*b)->elt; | |
726 | ||
727 | sort_key = &elt_a->map->sort_key; | |
728 | ||
729 | field = &elt_a->fields[sort_key->field_idx]; | |
730 | cmp_fn = field->cmp_fn; | |
731 | ||
732 | val_a = &elt_a->fields[sort_key->field_idx].sum; | |
733 | val_b = &elt_b->fields[sort_key->field_idx].sum; | |
734 | ||
735 | ret = cmp_fn(val_a, val_b); | |
736 | if (sort_key->descending) | |
737 | ret = -ret; | |
738 | ||
739 | return ret; | |
740 | } | |
741 | ||
742 | static int cmp_entries_key(const struct tracing_map_sort_entry **a, | |
743 | const struct tracing_map_sort_entry **b) | |
744 | { | |
745 | const struct tracing_map_elt *elt_a, *elt_b; | |
746 | struct tracing_map_sort_key *sort_key; | |
747 | struct tracing_map_field *field; | |
748 | tracing_map_cmp_fn_t cmp_fn; | |
749 | void *val_a, *val_b; | |
750 | int ret = 0; | |
751 | ||
752 | elt_a = (*a)->elt; | |
753 | elt_b = (*b)->elt; | |
754 | ||
755 | sort_key = &elt_a->map->sort_key; | |
756 | ||
757 | field = &elt_a->fields[sort_key->field_idx]; | |
758 | ||
759 | cmp_fn = field->cmp_fn; | |
760 | ||
761 | val_a = elt_a->key + field->offset; | |
762 | val_b = elt_b->key + field->offset; | |
763 | ||
764 | ret = cmp_fn(val_a, val_b); | |
765 | if (sort_key->descending) | |
766 | ret = -ret; | |
767 | ||
768 | return ret; | |
769 | } | |
770 | ||
771 | static void destroy_sort_entry(struct tracing_map_sort_entry *entry) | |
772 | { | |
773 | if (!entry) | |
774 | return; | |
775 | ||
776 | if (entry->elt_copied) | |
777 | tracing_map_elt_free(entry->elt); | |
778 | ||
779 | kfree(entry); | |
780 | } | |
781 | ||
782 | /** | |
783 | * tracing_map_destroy_sort_entries - Destroy an array of sort entries | |
784 | * @entries: The entries to destroy | |
785 | * @n_entries: The number of entries in the array | |
786 | * | |
787 | * Destroy the elements returned by a tracing_map_sort_entries() call. | |
788 | */ | |
789 | void tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, | |
790 | unsigned int n_entries) | |
791 | { | |
792 | unsigned int i; | |
793 | ||
794 | for (i = 0; i < n_entries; i++) | |
795 | destroy_sort_entry(entries[i]); | |
796 | ||
797 | vfree(entries); | |
798 | } | |
799 | ||
800 | static struct tracing_map_sort_entry * | |
801 | create_sort_entry(void *key, struct tracing_map_elt *elt) | |
802 | { | |
803 | struct tracing_map_sort_entry *sort_entry; | |
804 | ||
805 | sort_entry = kzalloc(sizeof(*sort_entry), GFP_KERNEL); | |
806 | if (!sort_entry) | |
807 | return NULL; | |
808 | ||
809 | sort_entry->key = key; | |
810 | sort_entry->elt = elt; | |
811 | ||
812 | return sort_entry; | |
813 | } | |
814 | ||
815 | static struct tracing_map_elt *copy_elt(struct tracing_map_elt *elt) | |
816 | { | |
817 | struct tracing_map_elt *dup_elt; | |
818 | unsigned int i; | |
819 | ||
820 | dup_elt = tracing_map_elt_alloc(elt->map); | |
4812952f | 821 | if (IS_ERR(dup_elt)) |
08d43a5f TZ |
822 | return NULL; |
823 | ||
824 | if (elt->map->ops && elt->map->ops->elt_copy) | |
825 | elt->map->ops->elt_copy(dup_elt, elt); | |
826 | ||
827 | dup_elt->private_data = elt->private_data; | |
828 | memcpy(dup_elt->key, elt->key, elt->map->key_size); | |
829 | ||
830 | for (i = 0; i < elt->map->n_fields; i++) { | |
831 | atomic64_set(&dup_elt->fields[i].sum, | |
832 | atomic64_read(&elt->fields[i].sum)); | |
833 | dup_elt->fields[i].cmp_fn = elt->fields[i].cmp_fn; | |
834 | } | |
835 | ||
836 | return dup_elt; | |
837 | } | |
838 | ||
839 | static int merge_dup(struct tracing_map_sort_entry **sort_entries, | |
840 | unsigned int target, unsigned int dup) | |
841 | { | |
842 | struct tracing_map_elt *target_elt, *elt; | |
843 | bool first_dup = (target - dup) == 1; | |
844 | int i; | |
845 | ||
846 | if (first_dup) { | |
847 | elt = sort_entries[target]->elt; | |
848 | target_elt = copy_elt(elt); | |
849 | if (!target_elt) | |
850 | return -ENOMEM; | |
851 | sort_entries[target]->elt = target_elt; | |
852 | sort_entries[target]->elt_copied = true; | |
853 | } else | |
854 | target_elt = sort_entries[target]->elt; | |
855 | ||
856 | elt = sort_entries[dup]->elt; | |
857 | ||
858 | for (i = 0; i < elt->map->n_fields; i++) | |
859 | atomic64_add(atomic64_read(&elt->fields[i].sum), | |
860 | &target_elt->fields[i].sum); | |
861 | ||
862 | sort_entries[dup]->dup = true; | |
863 | ||
864 | return 0; | |
865 | } | |
866 | ||
867 | static int merge_dups(struct tracing_map_sort_entry **sort_entries, | |
868 | int n_entries, unsigned int key_size) | |
869 | { | |
870 | unsigned int dups = 0, total_dups = 0; | |
871 | int err, i, j; | |
872 | void *key; | |
873 | ||
874 | if (n_entries < 2) | |
875 | return total_dups; | |
876 | ||
877 | sort(sort_entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
878 | (int (*)(const void *, const void *))cmp_entries_dup, NULL); | |
879 | ||
880 | key = sort_entries[0]->key; | |
881 | for (i = 1; i < n_entries; i++) { | |
882 | if (!memcmp(sort_entries[i]->key, key, key_size)) { | |
883 | dups++; total_dups++; | |
884 | err = merge_dup(sort_entries, i - dups, i); | |
885 | if (err) | |
886 | return err; | |
887 | continue; | |
888 | } | |
889 | key = sort_entries[i]->key; | |
890 | dups = 0; | |
891 | } | |
892 | ||
893 | if (!total_dups) | |
894 | return total_dups; | |
895 | ||
896 | for (i = 0, j = 0; i < n_entries; i++) { | |
897 | if (!sort_entries[i]->dup) { | |
898 | sort_entries[j] = sort_entries[i]; | |
899 | if (j++ != i) | |
900 | sort_entries[i] = NULL; | |
901 | } else { | |
902 | destroy_sort_entry(sort_entries[i]); | |
903 | sort_entries[i] = NULL; | |
904 | } | |
905 | } | |
906 | ||
907 | return total_dups; | |
908 | } | |
909 | ||
910 | static bool is_key(struct tracing_map *map, unsigned int field_idx) | |
911 | { | |
912 | unsigned int i; | |
913 | ||
914 | for (i = 0; i < map->n_keys; i++) | |
915 | if (map->key_idx[i] == field_idx) | |
916 | return true; | |
917 | return false; | |
918 | } | |
919 | ||
920 | static void sort_secondary(struct tracing_map *map, | |
921 | const struct tracing_map_sort_entry **entries, | |
922 | unsigned int n_entries, | |
923 | struct tracing_map_sort_key *primary_key, | |
924 | struct tracing_map_sort_key *secondary_key) | |
925 | { | |
926 | int (*primary_fn)(const struct tracing_map_sort_entry **, | |
927 | const struct tracing_map_sort_entry **); | |
928 | int (*secondary_fn)(const struct tracing_map_sort_entry **, | |
929 | const struct tracing_map_sort_entry **); | |
930 | unsigned i, start = 0, n_sub = 1; | |
931 | ||
932 | if (is_key(map, primary_key->field_idx)) | |
933 | primary_fn = cmp_entries_key; | |
934 | else | |
935 | primary_fn = cmp_entries_sum; | |
936 | ||
937 | if (is_key(map, secondary_key->field_idx)) | |
938 | secondary_fn = cmp_entries_key; | |
939 | else | |
940 | secondary_fn = cmp_entries_sum; | |
941 | ||
942 | for (i = 0; i < n_entries - 1; i++) { | |
943 | const struct tracing_map_sort_entry **a = &entries[i]; | |
944 | const struct tracing_map_sort_entry **b = &entries[i + 1]; | |
945 | ||
946 | if (primary_fn(a, b) == 0) { | |
947 | n_sub++; | |
948 | if (i < n_entries - 2) | |
949 | continue; | |
950 | } | |
951 | ||
952 | if (n_sub < 2) { | |
953 | start = i + 1; | |
954 | n_sub = 1; | |
955 | continue; | |
956 | } | |
957 | ||
958 | set_sort_key(map, secondary_key); | |
959 | sort(&entries[start], n_sub, | |
960 | sizeof(struct tracing_map_sort_entry *), | |
961 | (int (*)(const void *, const void *))secondary_fn, NULL); | |
962 | set_sort_key(map, primary_key); | |
963 | ||
964 | start = i + 1; | |
965 | n_sub = 1; | |
966 | } | |
967 | } | |
968 | ||
969 | /** | |
970 | * tracing_map_sort_entries - Sort the current set of tracing_map_elts in a map | |
971 | * @map: The tracing_map | |
972 | * @sort_key: The sort key to use for sorting | |
973 | * @sort_entries: outval: pointer to allocated and sorted array of entries | |
974 | * | |
975 | * tracing_map_sort_entries() sorts the current set of entries in the | |
976 | * map and returns the list of tracing_map_sort_entries containing | |
977 | * them to the client in the sort_entries param. The client can | |
978 | * access the struct tracing_map_elt element of interest directly as | |
979 | * the 'elt' field of a returned struct tracing_map_sort_entry object. | |
980 | * | |
981 | * The sort_key has only two fields: idx and descending. 'idx' refers | |
982 | * to the index of the field added via tracing_map_add_sum_field() or | |
983 | * tracing_map_add_key_field() when the tracing_map was initialized. | |
984 | * 'descending' is a flag that if set reverses the sort order, which | |
985 | * by default is ascending. | |
986 | * | |
987 | * The client should not hold on to the returned array but should use | |
988 | * it and call tracing_map_destroy_sort_entries() when done. | |
989 | * | |
990 | * Return: the number of sort_entries in the struct tracing_map_sort_entry | |
991 | * array, negative on error | |
992 | */ | |
993 | int tracing_map_sort_entries(struct tracing_map *map, | |
994 | struct tracing_map_sort_key *sort_keys, | |
995 | unsigned int n_sort_keys, | |
996 | struct tracing_map_sort_entry ***sort_entries) | |
997 | { | |
998 | int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, | |
999 | const struct tracing_map_sort_entry **); | |
1000 | struct tracing_map_sort_entry *sort_entry, **entries; | |
1001 | int i, n_entries, ret; | |
1002 | ||
1003 | entries = vmalloc(map->max_elts * sizeof(sort_entry)); | |
1004 | if (!entries) | |
1005 | return -ENOMEM; | |
1006 | ||
1007 | for (i = 0, n_entries = 0; i < map->map_size; i++) { | |
1008 | struct tracing_map_entry *entry; | |
1009 | ||
1010 | entry = TRACING_MAP_ENTRY(map->map, i); | |
1011 | ||
1012 | if (!entry->key || !entry->val) | |
1013 | continue; | |
1014 | ||
1015 | entries[n_entries] = create_sort_entry(entry->val->key, | |
1016 | entry->val); | |
1017 | if (!entries[n_entries++]) { | |
1018 | ret = -ENOMEM; | |
1019 | goto free; | |
1020 | } | |
1021 | } | |
1022 | ||
1023 | if (n_entries == 0) { | |
1024 | ret = 0; | |
1025 | goto free; | |
1026 | } | |
1027 | ||
1028 | if (n_entries == 1) { | |
1029 | *sort_entries = entries; | |
1030 | return 1; | |
1031 | } | |
1032 | ||
1033 | ret = merge_dups(entries, n_entries, map->key_size); | |
1034 | if (ret < 0) | |
1035 | goto free; | |
1036 | n_entries -= ret; | |
1037 | ||
1038 | if (is_key(map, sort_keys[0].field_idx)) | |
1039 | cmp_entries_fn = cmp_entries_key; | |
1040 | else | |
1041 | cmp_entries_fn = cmp_entries_sum; | |
1042 | ||
1043 | set_sort_key(map, &sort_keys[0]); | |
1044 | ||
1045 | sort(entries, n_entries, sizeof(struct tracing_map_sort_entry *), | |
1046 | (int (*)(const void *, const void *))cmp_entries_fn, NULL); | |
1047 | ||
1048 | if (n_sort_keys > 1) | |
1049 | sort_secondary(map, | |
1050 | (const struct tracing_map_sort_entry **)entries, | |
1051 | n_entries, | |
1052 | &sort_keys[0], | |
1053 | &sort_keys[1]); | |
1054 | ||
1055 | *sort_entries = entries; | |
1056 | ||
1057 | return n_entries; | |
1058 | free: | |
1059 | tracing_map_destroy_sort_entries(entries, n_entries); | |
1060 | ||
1061 | return ret; | |
1062 | } |