Commit | Line | Data |
---|---|---|
231457ec DB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2018 Davidlohr Bueso. | |
4 | * | |
5 | * Benchmark the various operations allowed for epoll_ctl(2). | |
6 | * The idea is to concurrently stress a single epoll instance | |
7 | */ | |
ba35fe93 | 8 | #ifdef HAVE_EVENTFD_SUPPORT |
231457ec DB |
9 | /* For the CLR_() macros */ |
10 | #include <string.h> | |
11 | #include <pthread.h> | |
12 | ||
13 | #include <errno.h> | |
14 | #include <inttypes.h> | |
15 | #include <signal.h> | |
16 | #include <stdlib.h> | |
91854f9a | 17 | #include <unistd.h> |
231457ec DB |
18 | #include <linux/compiler.h> |
19 | #include <linux/kernel.h> | |
20 | #include <sys/time.h> | |
21 | #include <sys/resource.h> | |
22 | #include <sys/epoll.h> | |
23 | #include <sys/eventfd.h> | |
9c3516d1 | 24 | #include <perf/cpumap.h> |
231457ec DB |
25 | |
26 | #include "../util/stat.h" | |
27 | #include <subcmd/parse-options.h> | |
28 | #include "bench.h" | |
231457ec DB |
29 | |
30 | #include <err.h> | |
31 | ||
32 | #define printinfo(fmt, arg...) \ | |
33 | do { if (__verbose) printf(fmt, ## arg); } while (0) | |
34 | ||
35 | static unsigned int nthreads = 0; | |
36 | static unsigned int nsecs = 8; | |
231457ec DB |
37 | static bool done, __verbose, randomize; |
38 | ||
39 | /* | |
40 | * epoll related shared variables. | |
41 | */ | |
42 | ||
43 | /* Maximum number of nesting allowed inside epoll sets */ | |
44 | #define EPOLL_MAXNESTS 4 | |
45 | ||
46 | enum { | |
47 | OP_EPOLL_ADD, | |
48 | OP_EPOLL_MOD, | |
49 | OP_EPOLL_DEL, | |
50 | EPOLL_NR_OPS, | |
51 | }; | |
52 | ||
53 | static int epollfd; | |
54 | static int *epollfdp; | |
55 | static bool noaffinity; | |
56 | static unsigned int nested = 0; | |
57 | ||
58 | /* amount of fds to monitor, per thread */ | |
59 | static unsigned int nfds = 64; | |
60 | ||
61 | static pthread_mutex_t thread_lock; | |
62 | static unsigned int threads_starting; | |
63 | static struct stats all_stats[EPOLL_NR_OPS]; | |
64 | static pthread_cond_t thread_parent, thread_worker; | |
65 | ||
66 | struct worker { | |
67 | int tid; | |
68 | pthread_t thread; | |
69 | unsigned long ops[EPOLL_NR_OPS]; | |
70 | int *fdmap; | |
71 | }; | |
72 | ||
73 | static const struct option options[] = { | |
74 | OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), | |
75 | OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), | |
76 | OPT_UINTEGER('f', "nfds", &nfds, "Specify amount of file descriptors to monitor for each thread"), | |
77 | OPT_BOOLEAN( 'n', "noaffinity", &noaffinity, "Disables CPU affinity"), | |
78 | OPT_UINTEGER( 'N', "nested", &nested, "Nesting level epoll hierarchy (default is 0, no nesting)"), | |
79 | OPT_BOOLEAN( 'R', "randomize", &randomize, "Perform random operations on random fds"), | |
80 | OPT_BOOLEAN( 'v', "verbose", &__verbose, "Verbose mode"), | |
81 | OPT_END() | |
82 | }; | |
83 | ||
84 | static const char * const bench_epoll_ctl_usage[] = { | |
85 | "perf bench epoll ctl <options>", | |
86 | NULL | |
87 | }; | |
88 | ||
89 | static void toggle_done(int sig __maybe_unused, | |
90 | siginfo_t *info __maybe_unused, | |
91 | void *uc __maybe_unused) | |
92 | { | |
93 | /* inform all threads that we're done for the day */ | |
94 | done = true; | |
e4d9b04b ACM |
95 | gettimeofday(&bench__end, NULL); |
96 | timersub(&bench__end, &bench__start, &bench__runtime); | |
231457ec DB |
97 | } |
98 | ||
99 | static void nest_epollfd(void) | |
100 | { | |
101 | unsigned int i; | |
102 | struct epoll_event ev; | |
103 | ||
104 | if (nested > EPOLL_MAXNESTS) | |
105 | nested = EPOLL_MAXNESTS; | |
106 | printinfo("Nesting level(s): %d\n", nested); | |
107 | ||
108 | epollfdp = calloc(nested, sizeof(int)); | |
073a15c3 | 109 | if (!epollfdp) |
231457ec DB |
110 | err(EXIT_FAILURE, "calloc"); |
111 | ||
112 | for (i = 0; i < nested; i++) { | |
113 | epollfdp[i] = epoll_create(1); | |
114 | if (epollfd < 0) | |
115 | err(EXIT_FAILURE, "epoll_create"); | |
116 | } | |
117 | ||
118 | ev.events = EPOLLHUP; /* anything */ | |
119 | ev.data.u64 = i; /* any number */ | |
120 | ||
121 | for (i = nested - 1; i; i--) { | |
122 | if (epoll_ctl(epollfdp[i - 1], EPOLL_CTL_ADD, | |
123 | epollfdp[i], &ev) < 0) | |
124 | err(EXIT_FAILURE, "epoll_ctl"); | |
125 | } | |
126 | ||
127 | if (epoll_ctl(epollfd, EPOLL_CTL_ADD, *epollfdp, &ev) < 0) | |
128 | err(EXIT_FAILURE, "epoll_ctl"); | |
129 | } | |
130 | ||
131 | static inline void do_epoll_op(struct worker *w, int op, int fd) | |
132 | { | |
133 | int error; | |
134 | struct epoll_event ev; | |
135 | ||
136 | ev.events = EPOLLIN; | |
137 | ev.data.u64 = fd; | |
138 | ||
139 | switch (op) { | |
140 | case OP_EPOLL_ADD: | |
141 | error = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev); | |
142 | break; | |
143 | case OP_EPOLL_MOD: | |
144 | ev.events = EPOLLOUT; | |
145 | error = epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &ev); | |
146 | break; | |
147 | case OP_EPOLL_DEL: | |
148 | error = epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, NULL); | |
149 | break; | |
150 | default: | |
151 | error = 1; | |
152 | break; | |
153 | } | |
154 | ||
155 | if (!error) | |
156 | w->ops[op]++; | |
157 | } | |
158 | ||
159 | static inline void do_random_epoll_op(struct worker *w) | |
160 | { | |
161 | unsigned long rnd1 = random(), rnd2 = random(); | |
162 | int op, fd; | |
163 | ||
164 | fd = w->fdmap[rnd1 % nfds]; | |
165 | op = rnd2 % EPOLL_NR_OPS; | |
166 | ||
167 | do_epoll_op(w, op, fd); | |
168 | } | |
169 | ||
170 | static void *workerfn(void *arg) | |
171 | { | |
172 | unsigned int i; | |
173 | struct worker *w = (struct worker *) arg; | |
174 | struct timespec ts = { .tv_sec = 0, | |
175 | .tv_nsec = 250 }; | |
176 | ||
177 | pthread_mutex_lock(&thread_lock); | |
178 | threads_starting--; | |
179 | if (!threads_starting) | |
180 | pthread_cond_signal(&thread_parent); | |
181 | pthread_cond_wait(&thread_worker, &thread_lock); | |
182 | pthread_mutex_unlock(&thread_lock); | |
183 | ||
184 | /* Let 'em loose */ | |
185 | do { | |
186 | /* random */ | |
187 | if (randomize) { | |
188 | do_random_epoll_op(w); | |
189 | } else { | |
190 | for (i = 0; i < nfds; i++) { | |
191 | do_epoll_op(w, OP_EPOLL_ADD, w->fdmap[i]); | |
192 | do_epoll_op(w, OP_EPOLL_MOD, w->fdmap[i]); | |
193 | do_epoll_op(w, OP_EPOLL_DEL, w->fdmap[i]); | |
194 | } | |
195 | } | |
196 | ||
197 | nanosleep(&ts, NULL); | |
198 | } while (!done); | |
199 | ||
200 | return NULL; | |
201 | } | |
202 | ||
203 | static void init_fdmaps(struct worker *w, int pct) | |
204 | { | |
205 | unsigned int i; | |
206 | int inc; | |
207 | struct epoll_event ev; | |
208 | ||
209 | if (!pct) | |
210 | return; | |
211 | ||
212 | inc = 100/pct; | |
213 | for (i = 0; i < nfds; i+=inc) { | |
214 | ev.data.fd = w->fdmap[i]; | |
215 | ev.events = EPOLLIN; | |
216 | ||
217 | if (epoll_ctl(epollfd, EPOLL_CTL_ADD, w->fdmap[i], &ev) < 0) | |
218 | err(EXIT_FAILURE, "epoll_ct"); | |
219 | } | |
220 | } | |
221 | ||
f854839b | 222 | static int do_threads(struct worker *worker, struct perf_cpu_map *cpu) |
231457ec DB |
223 | { |
224 | pthread_attr_t thread_attr, *attrp = NULL; | |
299687e1 | 225 | cpu_set_t *cpuset; |
231457ec | 226 | unsigned int i, j; |
11c1ea6f | 227 | int ret = 0; |
299687e1 AR |
228 | int nrcpus; |
229 | size_t size; | |
231457ec DB |
230 | |
231 | if (!noaffinity) | |
232 | pthread_attr_init(&thread_attr); | |
233 | ||
299687e1 AR |
234 | nrcpus = perf_cpu_map__nr(cpu); |
235 | cpuset = CPU_ALLOC(nrcpus); | |
236 | BUG_ON(!cpuset); | |
237 | size = CPU_ALLOC_SIZE(nrcpus); | |
238 | ||
231457ec DB |
239 | for (i = 0; i < nthreads; i++) { |
240 | struct worker *w = &worker[i]; | |
241 | ||
242 | w->tid = i; | |
243 | w->fdmap = calloc(nfds, sizeof(int)); | |
244 | if (!w->fdmap) | |
245 | return 1; | |
246 | ||
247 | for (j = 0; j < nfds; j++) { | |
248 | w->fdmap[j] = eventfd(0, EFD_NONBLOCK); | |
249 | if (w->fdmap[j] < 0) | |
250 | err(EXIT_FAILURE, "eventfd"); | |
251 | } | |
252 | ||
253 | /* | |
254 | * Lets add 50% of the fdmap to the epoll instance, and | |
255 | * do it before any threads are started; otherwise there is | |
256 | * an initial bias of the call failing (mod and del ops). | |
257 | */ | |
258 | if (randomize) | |
259 | init_fdmaps(w, 50); | |
260 | ||
261 | if (!noaffinity) { | |
299687e1 AR |
262 | CPU_ZERO_S(size, cpuset); |
263 | CPU_SET_S(perf_cpu_map__cpu(cpu, i % perf_cpu_map__nr(cpu)).cpu, | |
264 | size, cpuset); | |
231457ec | 265 | |
299687e1 AR |
266 | ret = pthread_attr_setaffinity_np(&thread_attr, size, cpuset); |
267 | if (ret) { | |
268 | CPU_FREE(cpuset); | |
231457ec | 269 | err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); |
299687e1 | 270 | } |
231457ec DB |
271 | |
272 | attrp = &thread_attr; | |
273 | } | |
274 | ||
275 | ret = pthread_create(&w->thread, attrp, workerfn, | |
276 | (void *)(struct worker *) w); | |
299687e1 AR |
277 | if (ret) { |
278 | CPU_FREE(cpuset); | |
231457ec | 279 | err(EXIT_FAILURE, "pthread_create"); |
299687e1 | 280 | } |
231457ec DB |
281 | } |
282 | ||
299687e1 | 283 | CPU_FREE(cpuset); |
231457ec DB |
284 | if (!noaffinity) |
285 | pthread_attr_destroy(&thread_attr); | |
286 | ||
287 | return ret; | |
288 | } | |
289 | ||
290 | static void print_summary(void) | |
291 | { | |
292 | int i; | |
293 | unsigned long avg[EPOLL_NR_OPS]; | |
294 | double stddev[EPOLL_NR_OPS]; | |
295 | ||
296 | for (i = 0; i < EPOLL_NR_OPS; i++) { | |
297 | avg[i] = avg_stats(&all_stats[i]); | |
298 | stddev[i] = stddev_stats(&all_stats[i]); | |
299 | } | |
300 | ||
301 | printf("\nAveraged %ld ADD operations (+- %.2f%%)\n", | |
302 | avg[OP_EPOLL_ADD], rel_stddev_stats(stddev[OP_EPOLL_ADD], | |
303 | avg[OP_EPOLL_ADD])); | |
304 | printf("Averaged %ld MOD operations (+- %.2f%%)\n", | |
305 | avg[OP_EPOLL_MOD], rel_stddev_stats(stddev[OP_EPOLL_MOD], | |
306 | avg[OP_EPOLL_MOD])); | |
307 | printf("Averaged %ld DEL operations (+- %.2f%%)\n", | |
308 | avg[OP_EPOLL_DEL], rel_stddev_stats(stddev[OP_EPOLL_DEL], | |
309 | avg[OP_EPOLL_DEL])); | |
310 | } | |
311 | ||
312 | int bench_epoll_ctl(int argc, const char **argv) | |
313 | { | |
314 | int j, ret = 0; | |
315 | struct sigaction act; | |
316 | struct worker *worker = NULL; | |
f854839b | 317 | struct perf_cpu_map *cpu; |
231457ec DB |
318 | struct rlimit rl, prevrl; |
319 | unsigned int i; | |
320 | ||
321 | argc = parse_options(argc, argv, options, bench_epoll_ctl_usage, 0); | |
322 | if (argc) { | |
323 | usage_with_options(bench_epoll_ctl_usage, options); | |
324 | exit(EXIT_FAILURE); | |
325 | } | |
326 | ||
7b919a53 | 327 | memset(&act, 0, sizeof(act)); |
231457ec DB |
328 | sigfillset(&act.sa_mask); |
329 | act.sa_sigaction = toggle_done; | |
330 | sigaction(SIGINT, &act, NULL); | |
331 | ||
9c3516d1 | 332 | cpu = perf_cpu_map__new(NULL); |
231457ec DB |
333 | if (!cpu) |
334 | goto errmem; | |
335 | ||
336 | /* a single, main epoll instance */ | |
337 | epollfd = epoll_create(1); | |
338 | if (epollfd < 0) | |
339 | err(EXIT_FAILURE, "epoll_create"); | |
340 | ||
341 | /* | |
342 | * Deal with nested epolls, if any. | |
343 | */ | |
344 | if (nested) | |
345 | nest_epollfd(); | |
346 | ||
347 | /* default to the number of CPUs */ | |
348 | if (!nthreads) | |
44028699 | 349 | nthreads = perf_cpu_map__nr(cpu); |
231457ec DB |
350 | |
351 | worker = calloc(nthreads, sizeof(*worker)); | |
352 | if (!worker) | |
353 | goto errmem; | |
354 | ||
355 | if (getrlimit(RLIMIT_NOFILE, &prevrl)) | |
356 | err(EXIT_FAILURE, "getrlimit"); | |
357 | rl.rlim_cur = rl.rlim_max = nfds * nthreads * 2 + 50; | |
358 | printinfo("Setting RLIMIT_NOFILE rlimit from %" PRIu64 " to: %" PRIu64 "\n", | |
359 | (uint64_t)prevrl.rlim_max, (uint64_t)rl.rlim_max); | |
360 | if (setrlimit(RLIMIT_NOFILE, &rl) < 0) | |
361 | err(EXIT_FAILURE, "setrlimit"); | |
362 | ||
363 | printf("Run summary [PID %d]: %d threads doing epoll_ctl ops " | |
364 | "%d file-descriptors for %d secs.\n\n", | |
365 | getpid(), nthreads, nfds, nsecs); | |
366 | ||
367 | for (i = 0; i < EPOLL_NR_OPS; i++) | |
368 | init_stats(&all_stats[i]); | |
369 | ||
370 | pthread_mutex_init(&thread_lock, NULL); | |
371 | pthread_cond_init(&thread_parent, NULL); | |
372 | pthread_cond_init(&thread_worker, NULL); | |
373 | ||
374 | threads_starting = nthreads; | |
375 | ||
e4d9b04b | 376 | gettimeofday(&bench__start, NULL); |
231457ec DB |
377 | |
378 | do_threads(worker, cpu); | |
379 | ||
380 | pthread_mutex_lock(&thread_lock); | |
381 | while (threads_starting) | |
382 | pthread_cond_wait(&thread_parent, &thread_lock); | |
383 | pthread_cond_broadcast(&thread_worker); | |
384 | pthread_mutex_unlock(&thread_lock); | |
385 | ||
386 | sleep(nsecs); | |
387 | toggle_done(0, NULL, NULL); | |
388 | printinfo("main thread: toggling done\n"); | |
389 | ||
390 | for (i = 0; i < nthreads; i++) { | |
391 | ret = pthread_join(worker[i].thread, NULL); | |
392 | if (ret) | |
393 | err(EXIT_FAILURE, "pthread_join"); | |
394 | } | |
395 | ||
396 | /* cleanup & report results */ | |
397 | pthread_cond_destroy(&thread_parent); | |
398 | pthread_cond_destroy(&thread_worker); | |
399 | pthread_mutex_destroy(&thread_lock); | |
400 | ||
401 | for (i = 0; i < nthreads; i++) { | |
402 | unsigned long t[EPOLL_NR_OPS]; | |
403 | ||
404 | for (j = 0; j < EPOLL_NR_OPS; j++) { | |
405 | t[j] = worker[i].ops[j]; | |
406 | update_stats(&all_stats[j], t[j]); | |
407 | } | |
408 | ||
409 | if (nfds == 1) | |
410 | printf("[thread %2d] fdmap: %p [ add: %04ld; mod: %04ld; del: %04lds ops ]\n", | |
411 | worker[i].tid, &worker[i].fdmap[0], | |
412 | t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); | |
413 | else | |
414 | printf("[thread %2d] fdmap: %p ... %p [ add: %04ld ops; mod: %04ld ops; del: %04ld ops ]\n", | |
415 | worker[i].tid, &worker[i].fdmap[0], | |
416 | &worker[i].fdmap[nfds-1], | |
417 | t[OP_EPOLL_ADD], t[OP_EPOLL_MOD], t[OP_EPOLL_DEL]); | |
418 | } | |
419 | ||
420 | print_summary(); | |
421 | ||
422 | close(epollfd); | |
423 | return ret; | |
424 | errmem: | |
425 | err(EXIT_FAILURE, "calloc"); | |
426 | } | |
ba35fe93 | 427 | #endif // HAVE_EVENTFD_SUPPORT |