Commit | Line | Data |
---|---|---|
d8525fbd JA |
1 | #include <assert.h> |
2 | #include <ctype.h> | |
3 | #include <errno.h> | |
4 | #include <fcntl.h> | |
5 | #include <malloc.h> | |
6 | #include <netdb.h> | |
7 | #include <netinet/in.h> | |
8 | #include <netinet/tcp.h> | |
9 | #include <sched.h> | |
10 | #include <signal.h> | |
11 | #include <stdio.h> | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | #include <sys/mman.h> | |
15 | #include <sys/sendfile.h> | |
16 | #include <sys/socket.h> | |
17 | #include <sys/time.h> | |
18 | #include <sys/types.h> | |
19 | #include <sys/wait.h> | |
20 | #include <time.h> | |
21 | #include <unistd.h> | |
22 | ||
23 | #include "splice.h" | |
24 | ||
25 | #define TARGET_HOSTNAME "localhost" | |
26 | ||
27 | #define BYTES (128*1024*1024UL) | |
000b72a8 | 28 | #define BUFSIZE (64*1024U) |
d8525fbd JA |
29 | |
30 | #define NR (BYTES/BUFSIZE) | |
31 | ||
32 | #define SENDFILE_LOOPS 10 | |
33 | #define SPLICE_LOOPS 10 | |
34 | #define SPLICE_PIPE_LOOPS 10 | |
35 | ||
36 | static int sendfile_loops = SENDFILE_LOOPS; | |
d8525fbd | 37 | static int splice_pipe_loops = SPLICE_PIPE_LOOPS; |
422d15ba JA |
38 | #if 0 |
39 | static int splice_loops = SPLICE_LOOPS; | |
40 | #endif | |
d8525fbd JA |
41 | |
42 | static volatile long long *cycles, cycles_per_sec; | |
43 | ||
44 | static struct timeval start_time; | |
45 | static double start_cycles; | |
46 | static double cpu_pct; | |
47 | ||
48 | static void start_timing(const char *desc) | |
49 | { | |
50 | printf("%-20s: ", desc); | |
51 | fflush(stdout); | |
52 | gettimeofday(&start_time, NULL); | |
53 | /* | |
54 | * Give the lowprio cycles thread a chance to run and thus | |
55 | * we get an accurate timestamp: | |
56 | */ | |
57 | sched_yield(); | |
58 | start_cycles = (double)*cycles; | |
59 | } | |
60 | ||
61 | static double end_timing(unsigned long long bytes, double *rate) | |
62 | { | |
63 | static long long total; | |
64 | struct timeval end_time; | |
65 | double usecs; | |
66 | double end_cycles, cpu_cycles; | |
67 | ||
68 | gettimeofday(&end_time, NULL); | |
69 | end_cycles = (double)*cycles; | |
70 | ||
71 | usecs = (double) (end_time.tv_sec - start_time.tv_sec); | |
72 | usecs *= 1000000.0; | |
73 | usecs += (double) (end_time.tv_usec - start_time.tv_usec); | |
74 | total += bytes; | |
75 | ||
76 | cpu_cycles = end_cycles - start_cycles; | |
77 | cpu_pct = 100.0 - | |
78 | cpu_cycles / cycles_per_sec / ( usecs / 1000000.0 ) * 100.0; | |
79 | ||
80 | *rate = (double) bytes / usecs / (1024*1024) * 1000000; | |
81 | ||
82 | printf("%.2fMB/s (%.1fMB total, %.2f%% CPU)\n", *rate, | |
83 | (double) total / (1024*1024), | |
84 | cpu_pct | |
85 | ); | |
86 | ||
87 | return cpu_pct; | |
88 | } | |
89 | ||
90 | static void calibrate_loops(void) | |
91 | { | |
92 | long long l0, l1; | |
93 | int i; | |
94 | ||
95 | cycles_per_sec = 0; | |
96 | printf("calibrating cycles: "); fflush(stdout); | |
97 | ||
98 | /* | |
99 | * Make sure we start on a precise timer IRQ boundary: | |
100 | */ | |
101 | usleep(50000); | |
102 | ||
103 | for (i = 0; i < 10; i++) { | |
104 | sched_yield(); | |
105 | l0 = *cycles; | |
106 | usleep(200000); | |
107 | l1 = *cycles; | |
108 | cycles_per_sec = max(cycles_per_sec, l1-l0); | |
109 | } | |
110 | cycles_per_sec *= 5; | |
111 | ||
112 | printf("%Ld cycles/sec\n", cycles_per_sec); | |
113 | } | |
114 | ||
000b72a8 | 115 | static int child(void) |
d8525fbd JA |
116 | { |
117 | static char buffer[BUFSIZE]; | |
118 | int sk; | |
119 | double c1, c2, c3; | |
120 | int fd; | |
121 | struct sockaddr_in s_to; | |
122 | struct hostent *hp; | |
123 | double r1, r2, r3, r4, r5; | |
000b72a8 JA |
124 | unsigned int i; |
125 | int pipefd[2]; | |
d8525fbd JA |
126 | loff_t off = 0; |
127 | ||
128 | r1 = r2 = r3 = r4 = r5 = 0; | |
129 | ||
130 | sk = socket(PF_INET, SOCK_STREAM, 0); | |
131 | if (!sk) | |
132 | return error("socket"); | |
133 | hp = gethostbyname (TARGET_HOSTNAME); | |
134 | BUG_ON(!hp); | |
135 | bzero ((char *) &s_to, sizeof (s_to)); | |
136 | bcopy ((char *) hp->h_addr, (char *) &(s_to.sin_addr), hp->h_length); | |
137 | s_to.sin_family = hp->h_addrtype; | |
138 | s_to.sin_port = htons(1111); | |
139 | ||
140 | calibrate_loops(); | |
141 | ||
142 | fprintf(stdout, "BUFSIZE = %d\n", BUFSIZE); | |
143 | fflush(stdout); | |
144 | ||
000b72a8 | 145 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
422d15ba | 146 | return error("connect"); |
d8525fbd | 147 | |
422d15ba | 148 | start_timing("Empty buffer"); |
fb02ff37 JA |
149 | for (i = 0; i < NR; i++) { |
150 | if (write(sk, buffer, BUFSIZE) != BUFSIZE) | |
151 | return error("empty buffer write"); | |
152 | } | |
422d15ba | 153 | end_timing(NR*BUFSIZE, &r1); |
d8525fbd | 154 | |
422d15ba JA |
155 | fd = open("largefile", O_RDONLY); |
156 | if (fd < 0) | |
157 | return error("largefile"); | |
d8525fbd | 158 | |
422d15ba JA |
159 | start_timing("Read/write loop"); |
160 | for (i = 0; i < NR; i++) { | |
161 | if (read(fd, buffer, BUFSIZE) != BUFSIZE) | |
162 | return error("largefile read"); | |
fb02ff37 JA |
163 | if (write(sk, buffer, BUFSIZE) != BUFSIZE) |
164 | return error("largefile write"); | |
d8525fbd | 165 | } |
422d15ba JA |
166 | end_timing(NR*BUFSIZE, &r2); |
167 | close(fd); | |
168 | close(sk); | |
d8525fbd | 169 | |
422d15ba | 170 | start_timing("sendfile"); |
d8525fbd | 171 | sendfile_again: |
422d15ba | 172 | sk = socket(PF_INET, SOCK_STREAM, 0); |
000b72a8 | 173 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
422d15ba JA |
174 | return error("connect"); |
175 | ||
176 | fd = open("largefile", O_RDONLY); | |
177 | if (fd < 0) | |
178 | return error("largefile"); | |
179 | ||
180 | i = NR*BUFSIZE; | |
181 | do { | |
182 | int ret = sendfile(sk, fd, NULL, i); | |
183 | i -= ret; | |
184 | } while (i); | |
185 | ||
186 | close(fd); | |
187 | close(sk); | |
188 | if (--sendfile_loops) | |
189 | goto sendfile_again; | |
190 | c1 = end_timing(NR*BUFSIZE*SENDFILE_LOOPS, &r3); | |
191 | ||
192 | start_timing("splice-pipe"); | |
d8525fbd | 193 | splice_pipe_again: |
422d15ba | 194 | sk = socket(PF_INET, SOCK_STREAM, 0); |
000b72a8 | 195 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
422d15ba JA |
196 | return error("connect"); |
197 | ||
198 | fd = open("largefile", O_RDONLY); | |
199 | if (fd < 0) | |
200 | return error("largefile"); | |
201 | if (pipe(pipefd) < 0) | |
202 | return error("pipe"); | |
203 | ||
204 | i = NR*BUFSIZE; | |
205 | off = 0; | |
206 | do { | |
13b72067 | 207 | int ret = ssplice(fd, &off, pipefd[1], NULL, min(i, BUFSIZE), SPLICE_F_NONBLOCK); |
422d15ba JA |
208 | if (ret <= 0) |
209 | return error("splice-pipe-in"); | |
210 | i -= ret; | |
211 | while (ret > 0) { | |
212 | int flags = i ? SPLICE_F_MORE : 0; | |
13b72067 | 213 | int written = ssplice(pipefd[0], NULL, sk, NULL, ret, flags); |
422d15ba JA |
214 | if (written <= 0) |
215 | return error("splice-pipe-out"); | |
216 | ret -= written; | |
217 | } | |
218 | } while (i); | |
219 | ||
220 | close(fd); | |
221 | close(sk); | |
222 | close(pipefd[0]); | |
223 | close(pipefd[1]); | |
224 | if (--splice_pipe_loops) | |
225 | goto splice_pipe_again; | |
226 | c2 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r4); | |
227 | ||
228 | /* | |
229 | * Direct splicing was disabled as being immediately available, | |
230 | * it's reserved for sendfile emulation now. | |
231 | */ | |
232 | #if 0 | |
233 | start_timing("splice"); | |
d8525fbd | 234 | splice_again: |
422d15ba | 235 | sk = socket(PF_INET, SOCK_STREAM, 0); |
000b72a8 | 236 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
422d15ba JA |
237 | return error("connect"); |
238 | ||
239 | fd = open("largefile", O_RDONLY); | |
240 | if (fd < 0) | |
241 | return error("largefile"); | |
242 | ||
243 | i = NR*BUFSIZE; | |
244 | off = 0; | |
245 | do { | |
246 | int flags = BUFSIZE < i ? SPLICE_F_MORE : 0; | |
247 | int ret; | |
248 | ||
13b72067 | 249 | ret = ssplice(fd, &off, sk, NULL, min(i, BUFSIZE), flags); |
422d15ba JA |
250 | |
251 | if (ret <= 0) | |
252 | return error("splice"); | |
253 | i -= ret; | |
254 | } while (i); | |
255 | ||
256 | close(fd); | |
257 | close(sk); | |
258 | if (--splice_loops) | |
259 | goto splice_again; | |
260 | c3 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r5); | |
261 | #else | |
262 | c3 = 0; | |
263 | #endif | |
d8525fbd JA |
264 | |
265 | /* | |
266 | * c1/r3 - sendfile | |
267 | * c2/r4 - splice-pipe | |
268 | * c3/r5 - splice | |
269 | */ | |
270 | ||
271 | if (c1 && c2) | |
272 | printf("sendfile is %.2f%% more efficient than splice-pipe.\n", | |
273 | (c2 - c1) / c1 * 100.0 ); | |
274 | if (c1 && c3) | |
275 | printf("sendfile is %.2f%% more efficient than splice.\n", | |
276 | (c3 - c1) / c1 * 100.0 ); | |
277 | if (c2 && c3) | |
278 | printf("splice is %.2f%% more efficient splice-pipe.\n", | |
279 | (c2 - c3) / c3 * 100.0 ); | |
280 | if (r3 && r4) | |
281 | printf("sendfile is %.2f%% faster than splice-pipe.\n", | |
282 | (r3 - r4) / r4 * 100.0 ); | |
283 | if (r3 && r5) | |
284 | printf("sendfile is %.2f%% faster than splice.\n", | |
285 | (r3 - r5) / r5 * 100.0 ); | |
286 | if (r4 && r5) | |
287 | printf("splice is %.2f%% faster than splice-pipe.\n", | |
288 | (r5 - r4) / r4 * 100.0 ); | |
289 | ||
290 | return 0; | |
291 | } | |
292 | ||
293 | ||
294 | static void setup_shared_var(void) | |
295 | { | |
296 | char zerobuff [4096] = { 0, }; | |
297 | int ret, fd; | |
298 | ||
299 | fd = creat(".tmp_mmap", 0700); | |
300 | BUG_ON(fd == -1); | |
301 | close(fd); | |
302 | ||
fb02ff37 | 303 | fd = open(".tmp_mmap", O_RDWR|O_CREAT|O_TRUNC, 0644); |
d8525fbd JA |
304 | BUG_ON(fd == -1); |
305 | ret = write(fd, zerobuff, 4096); | |
306 | BUG_ON(ret != 4096); | |
307 | ||
308 | cycles = (void *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); | |
309 | BUG_ON(cycles == (void *)-1); | |
310 | ||
311 | close(fd); | |
312 | } | |
313 | ||
314 | #define SCHED_BATCH 3 | |
315 | ||
0d53c23c JA |
316 | #if defined(__i386__) |
317 | #define rdtscll(val) \ | |
d8525fbd | 318 | do { \ |
0d53c23c | 319 | __asm__ __volatile__("rdtsc" : "=A" (val)); \ |
d8525fbd | 320 | } while (0) |
dffecf32 UF |
321 | #elif defined(__x86_64__) |
322 | #define rdtscll(val) \ | |
323 | do { \ | |
324 | uint64_t lo, hi; \ | |
325 | __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); \ | |
326 | (val) = (hi << 32) | lo; \ | |
327 | } while (0) | |
0d53c23c JA |
328 | #if 0 |
329 | #elif defined(__ia64__) | |
330 | #define rdtscll(val) \ | |
d8525fbd | 331 | do { \ |
0d53c23c | 332 | val = *__mm_clock_dev; \ |
d8525fbd | 333 | } while (0) |
0d53c23c | 334 | #endif |
d8525fbd | 335 | #else |
0d53c23c | 336 | #define rdtscll(val) \ |
d8525fbd JA |
337 | do { (val) = 0LL; } while (0) |
338 | #endif | |
339 | ||
340 | /* | |
341 | * Keep lowprio looping - to meausure the number of idle cycles | |
342 | * available. It's tricky: we do a series of RDTSC calls, and | |
343 | * if the delay to the last measurement was less than 500 cycles, | |
344 | * we conclude that only this loop ran. | |
345 | */ | |
346 | static void lowprio_cycle_soak_loop(void) | |
347 | { | |
348 | struct sched_param p = { sched_priority: 0 }; | |
349 | unsigned long long t0, t1, delta; | |
350 | ||
351 | /* | |
352 | * We are a nice +19 SCHED_BATCH task: | |
353 | */ | |
354 | BUG_ON(sched_setscheduler(0, SCHED_BATCH, &p) != 0); | |
fb02ff37 JA |
355 | if (nice(40) < 0) |
356 | perror("nice"); | |
d8525fbd JA |
357 | |
358 | rdtscll(t0); | |
359 | while (cycles >= 0) { | |
360 | rdtscll(t1); | |
361 | delta = t1-t0; | |
362 | if (delta < 500) | |
363 | *cycles += delta; | |
364 | t0 = t1; | |
365 | } | |
366 | } | |
367 | ||
000b72a8 | 368 | int main(__attribute__((__unused__)) int argc, __attribute__((__unused__)) char **argv) |
d8525fbd | 369 | { |
d8525fbd JA |
370 | pid_t pid; |
371 | ||
372 | setup_shared_var(); | |
373 | ||
374 | signal(SIGCHLD, SIG_IGN); | |
000b72a8 | 375 | |
d8525fbd JA |
376 | pid = fork(); |
377 | if (!pid) { | |
378 | lowprio_cycle_soak_loop(); | |
379 | exit(0); | |
380 | } | |
000b72a8 | 381 | |
fb02ff37 JA |
382 | if (nice(-20) < 0) |
383 | perror("nice"); | |
384 | ||
000b72a8 | 385 | child(); |
d8525fbd JA |
386 | kill(pid, SIGHUP); |
387 | exit(0); | |
388 | } |