Commit | Line | Data |
---|---|---|
25763b3c | 1 | // SPDX-License-Identifier: GPL-2.0-only |
5db58faf MKL |
2 | /* |
3 | * Copyright (c) 2016 Facebook | |
5db58faf MKL |
4 | */ |
5 | #define _GNU_SOURCE | |
6 | #include <linux/types.h> | |
7 | #include <stdio.h> | |
8 | #include <unistd.h> | |
9 | #include <linux/bpf.h> | |
10 | #include <errno.h> | |
11 | #include <string.h> | |
12 | #include <assert.h> | |
13 | #include <sched.h> | |
14 | #include <sys/wait.h> | |
15 | #include <sys/stat.h> | |
e00c7b21 | 16 | #include <sys/resource.h> |
5db58faf MKL |
17 | #include <fcntl.h> |
18 | #include <stdlib.h> | |
19 | #include <time.h> | |
e00c7b21 | 20 | |
2bf3e2ef | 21 | #include <bpf/bpf.h> |
e00c7b21 | 22 | #include "bpf_util.h" |
5db58faf MKL |
23 | |
24 | #define min(a, b) ((a) < (b) ? (a) : (b)) | |
dfc5be0d AA |
25 | #ifndef offsetof |
26 | # define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) | |
27 | #endif | |
5db58faf MKL |
28 | #define container_of(ptr, type, member) ({ \ |
29 | const typeof( ((type *)0)->member ) *__mptr = (ptr); \ | |
30 | (type *)( (char *)__mptr - offsetof(type,member) );}) | |
31 | ||
32 | static int nr_cpus; | |
33 | static unsigned long long *dist_keys; | |
34 | static unsigned int dist_key_counts; | |
35 | ||
36 | struct list_head { | |
37 | struct list_head *next, *prev; | |
38 | }; | |
39 | ||
40 | static inline void INIT_LIST_HEAD(struct list_head *list) | |
41 | { | |
42 | list->next = list; | |
43 | list->prev = list; | |
44 | } | |
45 | ||
46 | static inline int list_empty(const struct list_head *head) | |
47 | { | |
48 | return head->next == head; | |
49 | } | |
50 | ||
51 | static inline void __list_add(struct list_head *new, | |
52 | struct list_head *prev, | |
53 | struct list_head *next) | |
54 | { | |
55 | next->prev = new; | |
56 | new->next = next; | |
57 | new->prev = prev; | |
58 | prev->next = new; | |
59 | } | |
60 | ||
61 | static inline void list_add(struct list_head *new, struct list_head *head) | |
62 | { | |
63 | __list_add(new, head, head->next); | |
64 | } | |
65 | ||
66 | static inline void __list_del(struct list_head *prev, struct list_head *next) | |
67 | { | |
68 | next->prev = prev; | |
69 | prev->next = next; | |
70 | } | |
71 | ||
72 | static inline void __list_del_entry(struct list_head *entry) | |
73 | { | |
74 | __list_del(entry->prev, entry->next); | |
75 | } | |
76 | ||
77 | static inline void list_move(struct list_head *list, struct list_head *head) | |
78 | { | |
79 | __list_del_entry(list); | |
80 | list_add(list, head); | |
81 | } | |
82 | ||
83 | #define list_entry(ptr, type, member) \ | |
84 | container_of(ptr, type, member) | |
85 | ||
86 | #define list_last_entry(ptr, type, member) \ | |
87 | list_entry((ptr)->prev, type, member) | |
88 | ||
89 | struct pfect_lru_node { | |
90 | struct list_head list; | |
91 | unsigned long long key; | |
92 | }; | |
93 | ||
94 | struct pfect_lru { | |
95 | struct list_head list; | |
96 | struct pfect_lru_node *free_nodes; | |
97 | unsigned int cur_size; | |
98 | unsigned int lru_size; | |
99 | unsigned int nr_unique; | |
100 | unsigned int nr_misses; | |
101 | unsigned int total; | |
102 | int map_fd; | |
103 | }; | |
104 | ||
105 | static void pfect_lru_init(struct pfect_lru *lru, unsigned int lru_size, | |
106 | unsigned int nr_possible_elems) | |
107 | { | |
108 | lru->map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, | |
109 | sizeof(unsigned long long), | |
110 | sizeof(struct pfect_lru_node *), | |
111 | nr_possible_elems, 0); | |
112 | assert(lru->map_fd != -1); | |
113 | ||
114 | lru->free_nodes = malloc(lru_size * sizeof(struct pfect_lru_node)); | |
115 | assert(lru->free_nodes); | |
116 | ||
117 | INIT_LIST_HEAD(&lru->list); | |
118 | lru->cur_size = 0; | |
119 | lru->lru_size = lru_size; | |
120 | lru->nr_unique = lru->nr_misses = lru->total = 0; | |
121 | } | |
122 | ||
123 | static void pfect_lru_destroy(struct pfect_lru *lru) | |
124 | { | |
125 | close(lru->map_fd); | |
126 | free(lru->free_nodes); | |
127 | } | |
128 | ||
129 | static int pfect_lru_lookup_or_insert(struct pfect_lru *lru, | |
130 | unsigned long long key) | |
131 | { | |
132 | struct pfect_lru_node *node = NULL; | |
133 | int seen = 0; | |
134 | ||
135 | lru->total++; | |
d40fc181 | 136 | if (!bpf_map_lookup_elem(lru->map_fd, &key, &node)) { |
5db58faf MKL |
137 | if (node) { |
138 | list_move(&node->list, &lru->list); | |
139 | return 1; | |
140 | } | |
141 | seen = 1; | |
142 | } | |
143 | ||
144 | if (lru->cur_size < lru->lru_size) { | |
145 | node = &lru->free_nodes[lru->cur_size++]; | |
146 | INIT_LIST_HEAD(&node->list); | |
147 | } else { | |
148 | struct pfect_lru_node *null_node = NULL; | |
149 | ||
150 | node = list_last_entry(&lru->list, | |
151 | struct pfect_lru_node, | |
152 | list); | |
d40fc181 | 153 | bpf_map_update_elem(lru->map_fd, &node->key, &null_node, BPF_EXIST); |
5db58faf MKL |
154 | } |
155 | ||
156 | node->key = key; | |
157 | list_move(&node->list, &lru->list); | |
158 | ||
159 | lru->nr_misses++; | |
160 | if (seen) { | |
d40fc181 | 161 | assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_EXIST)); |
5db58faf MKL |
162 | } else { |
163 | lru->nr_unique++; | |
d40fc181 | 164 | assert(!bpf_map_update_elem(lru->map_fd, &key, &node, BPF_NOEXIST)); |
5db58faf MKL |
165 | } |
166 | ||
167 | return seen; | |
168 | } | |
169 | ||
170 | static unsigned int read_keys(const char *dist_file, | |
171 | unsigned long long **keys) | |
172 | { | |
173 | struct stat fst; | |
174 | unsigned long long *retkeys; | |
175 | unsigned int counts = 0; | |
176 | int dist_fd; | |
177 | char *b, *l; | |
178 | int i; | |
179 | ||
180 | dist_fd = open(dist_file, 0); | |
181 | assert(dist_fd != -1); | |
182 | ||
183 | assert(fstat(dist_fd, &fst) == 0); | |
184 | b = malloc(fst.st_size); | |
185 | assert(b); | |
186 | ||
187 | assert(read(dist_fd, b, fst.st_size) == fst.st_size); | |
188 | close(dist_fd); | |
189 | for (i = 0; i < fst.st_size; i++) { | |
190 | if (b[i] == '\n') | |
191 | counts++; | |
192 | } | |
193 | counts++; /* in case the last line has no \n */ | |
194 | ||
195 | retkeys = malloc(counts * sizeof(unsigned long long)); | |
196 | assert(retkeys); | |
197 | ||
198 | counts = 0; | |
199 | for (l = strtok(b, "\n"); l; l = strtok(NULL, "\n")) | |
200 | retkeys[counts++] = strtoull(l, NULL, 10); | |
201 | free(b); | |
202 | ||
203 | *keys = retkeys; | |
204 | ||
205 | return counts; | |
206 | } | |
207 | ||
208 | static int create_map(int map_type, int map_flags, unsigned int size) | |
209 | { | |
210 | int map_fd; | |
211 | ||
212 | map_fd = bpf_create_map(map_type, sizeof(unsigned long long), | |
213 | sizeof(unsigned long long), size, map_flags); | |
214 | ||
215 | if (map_fd == -1) | |
216 | perror("bpf_create_map"); | |
217 | ||
218 | return map_fd; | |
219 | } | |
220 | ||
221 | static int sched_next_online(int pid, int next_to_try) | |
222 | { | |
223 | cpu_set_t cpuset; | |
224 | ||
225 | if (next_to_try == nr_cpus) | |
226 | return -1; | |
227 | ||
228 | while (next_to_try < nr_cpus) { | |
229 | CPU_ZERO(&cpuset); | |
230 | CPU_SET(next_to_try++, &cpuset); | |
231 | if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) | |
232 | break; | |
233 | } | |
234 | ||
235 | return next_to_try; | |
236 | } | |
237 | ||
238 | static void run_parallel(unsigned int tasks, void (*fn)(int i, void *data), | |
239 | void *data) | |
240 | { | |
241 | int next_sched_cpu = 0; | |
242 | pid_t pid[tasks]; | |
243 | int i; | |
244 | ||
245 | for (i = 0; i < tasks; i++) { | |
246 | pid[i] = fork(); | |
247 | if (pid[i] == 0) { | |
248 | next_sched_cpu = sched_next_online(0, next_sched_cpu); | |
249 | fn(i, data); | |
250 | exit(0); | |
251 | } else if (pid[i] == -1) { | |
252 | printf("couldn't spawn #%d process\n", i); | |
253 | exit(1); | |
254 | } | |
255 | /* It is mostly redundant and just allow the parent | |
256 | * process to update next_shced_cpu for the next child | |
257 | * process | |
258 | */ | |
259 | next_sched_cpu = sched_next_online(pid[i], next_sched_cpu); | |
260 | } | |
261 | for (i = 0; i < tasks; i++) { | |
262 | int status; | |
263 | ||
264 | assert(waitpid(pid[i], &status, 0) == pid[i]); | |
265 | assert(status == 0); | |
266 | } | |
267 | } | |
268 | ||
269 | static void do_test_lru_dist(int task, void *data) | |
270 | { | |
271 | unsigned int nr_misses = 0; | |
272 | struct pfect_lru pfect_lru; | |
273 | unsigned long long key, value = 1234; | |
274 | unsigned int i; | |
275 | ||
276 | unsigned int lru_map_fd = ((unsigned int *)data)[0]; | |
277 | unsigned int lru_size = ((unsigned int *)data)[1]; | |
278 | unsigned long long key_offset = task * dist_key_counts; | |
279 | ||
280 | pfect_lru_init(&pfect_lru, lru_size, dist_key_counts); | |
281 | ||
282 | for (i = 0; i < dist_key_counts; i++) { | |
283 | key = dist_keys[i] + key_offset; | |
284 | ||
285 | pfect_lru_lookup_or_insert(&pfect_lru, key); | |
286 | ||
d40fc181 | 287 | if (!bpf_map_lookup_elem(lru_map_fd, &key, &value)) |
5db58faf MKL |
288 | continue; |
289 | ||
d40fc181 JS |
290 | if (bpf_map_update_elem(lru_map_fd, &key, &value, BPF_NOEXIST)) { |
291 | printf("bpf_map_update_elem(lru_map_fd, %llu): errno:%d\n", | |
5db58faf MKL |
292 | key, errno); |
293 | assert(0); | |
294 | } | |
295 | ||
296 | nr_misses++; | |
297 | } | |
298 | ||
299 | printf(" task:%d BPF LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", | |
300 | task, pfect_lru.nr_unique, dist_key_counts, nr_misses, | |
301 | dist_key_counts); | |
302 | printf(" task:%d Perfect LRU: nr_unique:%u(/%u) nr_misses:%u(/%u)\n", | |
303 | task, pfect_lru.nr_unique, pfect_lru.total, | |
304 | pfect_lru.nr_misses, pfect_lru.total); | |
305 | ||
306 | pfect_lru_destroy(&pfect_lru); | |
307 | close(lru_map_fd); | |
308 | } | |
309 | ||
310 | static void test_parallel_lru_dist(int map_type, int map_flags, | |
311 | int nr_tasks, unsigned int lru_size) | |
312 | { | |
313 | int child_data[2]; | |
314 | int lru_map_fd; | |
315 | ||
316 | printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, | |
317 | map_flags); | |
318 | ||
319 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
320 | lru_map_fd = create_map(map_type, map_flags, | |
321 | nr_cpus * lru_size); | |
322 | else | |
323 | lru_map_fd = create_map(map_type, map_flags, | |
324 | nr_tasks * lru_size); | |
325 | assert(lru_map_fd != -1); | |
326 | ||
327 | child_data[0] = lru_map_fd; | |
328 | child_data[1] = lru_size; | |
329 | ||
330 | run_parallel(nr_tasks, do_test_lru_dist, child_data); | |
331 | ||
332 | close(lru_map_fd); | |
333 | } | |
334 | ||
335 | static void test_lru_loss0(int map_type, int map_flags) | |
336 | { | |
337 | unsigned long long key, value[nr_cpus]; | |
338 | unsigned int old_unused_losses = 0; | |
339 | unsigned int new_unused_losses = 0; | |
340 | unsigned int used_losses = 0; | |
341 | int map_fd; | |
342 | ||
343 | printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, | |
344 | map_flags); | |
345 | ||
346 | assert(sched_next_online(0, 0) != -1); | |
347 | ||
348 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
349 | map_fd = create_map(map_type, map_flags, 900 * nr_cpus); | |
350 | else | |
351 | map_fd = create_map(map_type, map_flags, 900); | |
352 | ||
353 | assert(map_fd != -1); | |
354 | ||
355 | value[0] = 1234; | |
356 | ||
357 | for (key = 1; key <= 1000; key++) { | |
358 | int start_key, end_key; | |
359 | ||
d40fc181 | 360 | assert(bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST) == 0); |
5db58faf MKL |
361 | |
362 | start_key = 101; | |
363 | end_key = min(key, 900); | |
364 | ||
365 | while (start_key <= end_key) { | |
d40fc181 | 366 | bpf_map_lookup_elem(map_fd, &start_key, value); |
5db58faf MKL |
367 | start_key++; |
368 | } | |
369 | } | |
370 | ||
371 | for (key = 1; key <= 1000; key++) { | |
d40fc181 | 372 | if (bpf_map_lookup_elem(map_fd, &key, value)) { |
5db58faf MKL |
373 | if (key <= 100) |
374 | old_unused_losses++; | |
375 | else if (key <= 900) | |
376 | used_losses++; | |
377 | else | |
378 | new_unused_losses++; | |
379 | } | |
380 | } | |
381 | ||
382 | close(map_fd); | |
383 | ||
384 | printf("older-elem-losses:%d(/100) active-elem-losses:%d(/800) " | |
385 | "newer-elem-losses:%d(/100)\n", | |
386 | old_unused_losses, used_losses, new_unused_losses); | |
387 | } | |
388 | ||
389 | static void test_lru_loss1(int map_type, int map_flags) | |
390 | { | |
391 | unsigned long long key, value[nr_cpus]; | |
392 | int map_fd; | |
393 | unsigned int nr_losses = 0; | |
394 | ||
395 | printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type, | |
396 | map_flags); | |
397 | ||
398 | assert(sched_next_online(0, 0) != -1); | |
399 | ||
400 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
401 | map_fd = create_map(map_type, map_flags, 1000 * nr_cpus); | |
402 | else | |
403 | map_fd = create_map(map_type, map_flags, 1000); | |
404 | ||
405 | assert(map_fd != -1); | |
406 | ||
407 | value[0] = 1234; | |
408 | ||
409 | for (key = 1; key <= 1000; key++) | |
d40fc181 | 410 | assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST)); |
5db58faf MKL |
411 | |
412 | for (key = 1; key <= 1000; key++) { | |
d40fc181 | 413 | if (bpf_map_lookup_elem(map_fd, &key, value)) |
5db58faf MKL |
414 | nr_losses++; |
415 | } | |
416 | ||
417 | close(map_fd); | |
418 | ||
419 | printf("nr_losses:%d(/1000)\n", nr_losses); | |
420 | } | |
421 | ||
422 | static void do_test_parallel_lru_loss(int task, void *data) | |
423 | { | |
424 | const unsigned int nr_stable_elems = 1000; | |
425 | const unsigned int nr_repeats = 100000; | |
426 | ||
427 | int map_fd = *(int *)data; | |
428 | unsigned long long stable_base; | |
429 | unsigned long long key, value[nr_cpus]; | |
430 | unsigned long long next_ins_key; | |
431 | unsigned int nr_losses = 0; | |
432 | unsigned int i; | |
433 | ||
434 | stable_base = task * nr_repeats * 2 + 1; | |
435 | next_ins_key = stable_base; | |
436 | value[0] = 1234; | |
437 | for (i = 0; i < nr_stable_elems; i++) { | |
d40fc181 | 438 | assert(bpf_map_update_elem(map_fd, &next_ins_key, value, |
5db58faf MKL |
439 | BPF_NOEXIST) == 0); |
440 | next_ins_key++; | |
441 | } | |
442 | ||
443 | for (i = 0; i < nr_repeats; i++) { | |
444 | int rn; | |
445 | ||
446 | rn = rand(); | |
447 | ||
448 | if (rn % 10) { | |
449 | key = rn % nr_stable_elems + stable_base; | |
d40fc181 | 450 | bpf_map_lookup_elem(map_fd, &key, value); |
5db58faf | 451 | } else { |
d40fc181 | 452 | bpf_map_update_elem(map_fd, &next_ins_key, value, |
5db58faf MKL |
453 | BPF_NOEXIST); |
454 | next_ins_key++; | |
455 | } | |
456 | } | |
457 | ||
458 | key = stable_base; | |
459 | for (i = 0; i < nr_stable_elems; i++) { | |
d40fc181 | 460 | if (bpf_map_lookup_elem(map_fd, &key, value)) |
5db58faf MKL |
461 | nr_losses++; |
462 | key++; | |
463 | } | |
464 | ||
465 | printf(" task:%d nr_losses:%u\n", task, nr_losses); | |
466 | } | |
467 | ||
468 | static void test_parallel_lru_loss(int map_type, int map_flags, int nr_tasks) | |
469 | { | |
470 | int map_fd; | |
471 | ||
472 | printf("%s (map_type:%d map_flags:0x%X):\n", __func__, map_type, | |
473 | map_flags); | |
474 | ||
475 | /* Give 20% more than the active working set */ | |
476 | if (map_flags & BPF_F_NO_COMMON_LRU) | |
477 | map_fd = create_map(map_type, map_flags, | |
478 | nr_cpus * (1000 + 200)); | |
479 | else | |
480 | map_fd = create_map(map_type, map_flags, | |
481 | nr_tasks * (1000 + 200)); | |
482 | ||
483 | assert(map_fd != -1); | |
484 | ||
485 | run_parallel(nr_tasks, do_test_parallel_lru_loss, &map_fd); | |
486 | ||
487 | close(map_fd); | |
488 | } | |
489 | ||
490 | int main(int argc, char **argv) | |
491 | { | |
492 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | |
493 | int map_flags[] = {0, BPF_F_NO_COMMON_LRU}; | |
494 | const char *dist_file; | |
495 | int nr_tasks = 1; | |
496 | int lru_size; | |
497 | int f; | |
498 | ||
499 | if (argc < 4) { | |
500 | printf("Usage: %s <dist-file> <lru-size> <nr-tasks>\n", | |
501 | argv[0]); | |
502 | return -1; | |
503 | } | |
504 | ||
505 | dist_file = argv[1]; | |
506 | lru_size = atoi(argv[2]); | |
507 | nr_tasks = atoi(argv[3]); | |
508 | ||
509 | setbuf(stdout, NULL); | |
510 | ||
511 | assert(!setrlimit(RLIMIT_MEMLOCK, &r)); | |
512 | ||
513 | srand(time(NULL)); | |
514 | ||
e00c7b21 | 515 | nr_cpus = bpf_num_possible_cpus(); |
5db58faf MKL |
516 | assert(nr_cpus != -1); |
517 | printf("nr_cpus:%d\n\n", nr_cpus); | |
518 | ||
519 | nr_tasks = min(nr_tasks, nr_cpus); | |
520 | ||
521 | dist_key_counts = read_keys(dist_file, &dist_keys); | |
522 | if (!dist_key_counts) { | |
523 | printf("%s has no key\n", dist_file); | |
524 | return -1; | |
525 | } | |
526 | ||
527 | for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) { | |
528 | test_lru_loss0(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); | |
529 | test_lru_loss1(BPF_MAP_TYPE_LRU_HASH, map_flags[f]); | |
530 | test_parallel_lru_loss(BPF_MAP_TYPE_LRU_HASH, map_flags[f], | |
531 | nr_tasks); | |
532 | test_parallel_lru_dist(BPF_MAP_TYPE_LRU_HASH, map_flags[f], | |
533 | nr_tasks, lru_size); | |
534 | printf("\n"); | |
535 | } | |
536 | ||
537 | free(dist_keys); | |
538 | ||
539 | return 0; | |
540 | } |