Commit | Line | Data |
---|---|---|
d8525fbd JA |
1 | #include <assert.h> |
2 | #include <ctype.h> | |
3 | #include <errno.h> | |
4 | #include <fcntl.h> | |
5 | #include <malloc.h> | |
6 | #include <netdb.h> | |
7 | #include <netinet/in.h> | |
8 | #include <netinet/tcp.h> | |
9 | #include <sched.h> | |
10 | #include <signal.h> | |
11 | #include <stdio.h> | |
12 | #include <stdlib.h> | |
13 | #include <string.h> | |
14 | #include <sys/mman.h> | |
15 | #include <sys/sendfile.h> | |
16 | #include <sys/socket.h> | |
17 | #include <sys/time.h> | |
18 | #include <sys/types.h> | |
19 | #include <sys/wait.h> | |
20 | #include <time.h> | |
21 | #include <unistd.h> | |
22 | ||
23 | #include "splice.h" | |
24 | ||
25 | #define TARGET_HOSTNAME "localhost" | |
26 | ||
27 | #define BYTES (128*1024*1024UL) | |
28 | #define BUFSIZE (64*1024) | |
29 | ||
30 | #define NR (BYTES/BUFSIZE) | |
31 | ||
32 | #define SENDFILE_LOOPS 10 | |
33 | #define SPLICE_LOOPS 10 | |
34 | #define SPLICE_PIPE_LOOPS 10 | |
35 | ||
36 | static int sendfile_loops = SENDFILE_LOOPS; | |
d8525fbd | 37 | static int splice_pipe_loops = SPLICE_PIPE_LOOPS; |
422d15ba JA |
38 | #if 0 |
39 | static int splice_loops = SPLICE_LOOPS; | |
40 | #endif | |
d8525fbd JA |
41 | |
42 | static volatile long long *cycles, cycles_per_sec; | |
43 | ||
44 | static struct timeval start_time; | |
45 | static double start_cycles; | |
46 | static double cpu_pct; | |
47 | ||
48 | static void start_timing(const char *desc) | |
49 | { | |
50 | printf("%-20s: ", desc); | |
51 | fflush(stdout); | |
52 | gettimeofday(&start_time, NULL); | |
53 | /* | |
54 | * Give the lowprio cycles thread a chance to run and thus | |
55 | * we get an accurate timestamp: | |
56 | */ | |
57 | sched_yield(); | |
58 | start_cycles = (double)*cycles; | |
59 | } | |
60 | ||
61 | static double end_timing(unsigned long long bytes, double *rate) | |
62 | { | |
63 | static long long total; | |
64 | struct timeval end_time; | |
65 | double usecs; | |
66 | double end_cycles, cpu_cycles; | |
67 | ||
68 | gettimeofday(&end_time, NULL); | |
69 | end_cycles = (double)*cycles; | |
70 | ||
71 | usecs = (double) (end_time.tv_sec - start_time.tv_sec); | |
72 | usecs *= 1000000.0; | |
73 | usecs += (double) (end_time.tv_usec - start_time.tv_usec); | |
74 | total += bytes; | |
75 | ||
76 | cpu_cycles = end_cycles - start_cycles; | |
77 | cpu_pct = 100.0 - | |
78 | cpu_cycles / cycles_per_sec / ( usecs / 1000000.0 ) * 100.0; | |
79 | ||
80 | *rate = (double) bytes / usecs / (1024*1024) * 1000000; | |
81 | ||
82 | printf("%.2fMB/s (%.1fMB total, %.2f%% CPU)\n", *rate, | |
83 | (double) total / (1024*1024), | |
84 | cpu_pct | |
85 | ); | |
86 | ||
87 | return cpu_pct; | |
88 | } | |
89 | ||
90 | static void calibrate_loops(void) | |
91 | { | |
92 | long long l0, l1; | |
93 | int i; | |
94 | ||
95 | cycles_per_sec = 0; | |
96 | printf("calibrating cycles: "); fflush(stdout); | |
97 | ||
98 | /* | |
99 | * Make sure we start on a precise timer IRQ boundary: | |
100 | */ | |
101 | usleep(50000); | |
102 | ||
103 | for (i = 0; i < 10; i++) { | |
104 | sched_yield(); | |
105 | l0 = *cycles; | |
106 | usleep(200000); | |
107 | l1 = *cycles; | |
108 | cycles_per_sec = max(cycles_per_sec, l1-l0); | |
109 | } | |
110 | cycles_per_sec *= 5; | |
111 | ||
112 | printf("%Ld cycles/sec\n", cycles_per_sec); | |
113 | } | |
114 | ||
115 | static int child(struct sockaddr *addr, int len) | |
116 | { | |
117 | static char buffer[BUFSIZE]; | |
118 | int sk; | |
119 | double c1, c2, c3; | |
120 | int fd; | |
121 | struct sockaddr_in s_to; | |
122 | struct hostent *hp; | |
123 | double r1, r2, r3, r4, r5; | |
124 | int i, pipefd[2]; | |
125 | loff_t off = 0; | |
126 | ||
127 | r1 = r2 = r3 = r4 = r5 = 0; | |
128 | ||
129 | sk = socket(PF_INET, SOCK_STREAM, 0); | |
130 | if (!sk) | |
131 | return error("socket"); | |
132 | hp = gethostbyname (TARGET_HOSTNAME); | |
133 | BUG_ON(!hp); | |
134 | bzero ((char *) &s_to, sizeof (s_to)); | |
135 | bcopy ((char *) hp->h_addr, (char *) &(s_to.sin_addr), hp->h_length); | |
136 | s_to.sin_family = hp->h_addrtype; | |
137 | s_to.sin_port = htons(1111); | |
138 | ||
139 | calibrate_loops(); | |
140 | ||
141 | fprintf(stdout, "BUFSIZE = %d\n", BUFSIZE); | |
142 | fflush(stdout); | |
143 | ||
422d15ba JA |
144 | if (connect(sk, (struct sockaddr *)&s_to, len) < 0) |
145 | return error("connect"); | |
d8525fbd | 146 | |
422d15ba JA |
147 | start_timing("Empty buffer"); |
148 | for (i = 0; i < NR; i++) | |
149 | write(sk, buffer, BUFSIZE); | |
150 | end_timing(NR*BUFSIZE, &r1); | |
d8525fbd | 151 | |
422d15ba JA |
152 | fd = open("largefile", O_RDONLY); |
153 | if (fd < 0) | |
154 | return error("largefile"); | |
d8525fbd | 155 | |
422d15ba JA |
156 | start_timing("Read/write loop"); |
157 | for (i = 0; i < NR; i++) { | |
158 | if (read(fd, buffer, BUFSIZE) != BUFSIZE) | |
159 | return error("largefile read"); | |
160 | write(sk, buffer, BUFSIZE); | |
d8525fbd | 161 | } |
422d15ba JA |
162 | end_timing(NR*BUFSIZE, &r2); |
163 | close(fd); | |
164 | close(sk); | |
d8525fbd | 165 | |
422d15ba | 166 | start_timing("sendfile"); |
d8525fbd | 167 | sendfile_again: |
422d15ba JA |
168 | sk = socket(PF_INET, SOCK_STREAM, 0); |
169 | if (connect(sk, (struct sockaddr *)&s_to, len) < 0) | |
170 | return error("connect"); | |
171 | ||
172 | fd = open("largefile", O_RDONLY); | |
173 | if (fd < 0) | |
174 | return error("largefile"); | |
175 | ||
176 | i = NR*BUFSIZE; | |
177 | do { | |
178 | int ret = sendfile(sk, fd, NULL, i); | |
179 | i -= ret; | |
180 | } while (i); | |
181 | ||
182 | close(fd); | |
183 | close(sk); | |
184 | if (--sendfile_loops) | |
185 | goto sendfile_again; | |
186 | c1 = end_timing(NR*BUFSIZE*SENDFILE_LOOPS, &r3); | |
187 | ||
188 | start_timing("splice-pipe"); | |
d8525fbd | 189 | splice_pipe_again: |
422d15ba JA |
190 | sk = socket(PF_INET, SOCK_STREAM, 0); |
191 | if (connect(sk, (struct sockaddr *)&s_to, len) < 0) | |
192 | return error("connect"); | |
193 | ||
194 | fd = open("largefile", O_RDONLY); | |
195 | if (fd < 0) | |
196 | return error("largefile"); | |
197 | if (pipe(pipefd) < 0) | |
198 | return error("pipe"); | |
199 | ||
200 | i = NR*BUFSIZE; | |
201 | off = 0; | |
202 | do { | |
203 | int ret = splice(fd, &off, pipefd[1], NULL, min(i, BUFSIZE), SPLICE_F_NONBLOCK); | |
204 | if (ret <= 0) | |
205 | return error("splice-pipe-in"); | |
206 | i -= ret; | |
207 | while (ret > 0) { | |
208 | int flags = i ? SPLICE_F_MORE : 0; | |
209 | int written = splice(pipefd[0], NULL, sk, NULL, ret, flags); | |
210 | if (written <= 0) | |
211 | return error("splice-pipe-out"); | |
212 | ret -= written; | |
213 | } | |
214 | } while (i); | |
215 | ||
216 | close(fd); | |
217 | close(sk); | |
218 | close(pipefd[0]); | |
219 | close(pipefd[1]); | |
220 | if (--splice_pipe_loops) | |
221 | goto splice_pipe_again; | |
222 | c2 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r4); | |
223 | ||
224 | /* | |
225 | * Direct splicing was disabled as being immediately available, | |
226 | * it's reserved for sendfile emulation now. | |
227 | */ | |
228 | #if 0 | |
229 | start_timing("splice"); | |
d8525fbd | 230 | splice_again: |
422d15ba JA |
231 | sk = socket(PF_INET, SOCK_STREAM, 0); |
232 | if (connect(sk, (struct sockaddr *)&s_to, len) < 0) | |
233 | return error("connect"); | |
234 | ||
235 | fd = open("largefile", O_RDONLY); | |
236 | if (fd < 0) | |
237 | return error("largefile"); | |
238 | ||
239 | i = NR*BUFSIZE; | |
240 | off = 0; | |
241 | do { | |
242 | int flags = BUFSIZE < i ? SPLICE_F_MORE : 0; | |
243 | int ret; | |
244 | ||
245 | ret = splice(fd, &off, sk, NULL, min(i, BUFSIZE), flags); | |
246 | ||
247 | if (ret <= 0) | |
248 | return error("splice"); | |
249 | i -= ret; | |
250 | } while (i); | |
251 | ||
252 | close(fd); | |
253 | close(sk); | |
254 | if (--splice_loops) | |
255 | goto splice_again; | |
256 | c3 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r5); | |
257 | #else | |
258 | c3 = 0; | |
259 | #endif | |
d8525fbd JA |
260 | |
261 | /* | |
262 | * c1/r3 - sendfile | |
263 | * c2/r4 - splice-pipe | |
264 | * c3/r5 - splice | |
265 | */ | |
266 | ||
267 | if (c1 && c2) | |
268 | printf("sendfile is %.2f%% more efficient than splice-pipe.\n", | |
269 | (c2 - c1) / c1 * 100.0 ); | |
270 | if (c1 && c3) | |
271 | printf("sendfile is %.2f%% more efficient than splice.\n", | |
272 | (c3 - c1) / c1 * 100.0 ); | |
273 | if (c2 && c3) | |
274 | printf("splice is %.2f%% more efficient splice-pipe.\n", | |
275 | (c2 - c3) / c3 * 100.0 ); | |
276 | if (r3 && r4) | |
277 | printf("sendfile is %.2f%% faster than splice-pipe.\n", | |
278 | (r3 - r4) / r4 * 100.0 ); | |
279 | if (r3 && r5) | |
280 | printf("sendfile is %.2f%% faster than splice.\n", | |
281 | (r3 - r5) / r5 * 100.0 ); | |
282 | if (r4 && r5) | |
283 | printf("splice is %.2f%% faster than splice-pipe.\n", | |
284 | (r5 - r4) / r4 * 100.0 ); | |
285 | ||
286 | return 0; | |
287 | } | |
288 | ||
289 | ||
290 | static void setup_shared_var(void) | |
291 | { | |
292 | char zerobuff [4096] = { 0, }; | |
293 | int ret, fd; | |
294 | ||
295 | fd = creat(".tmp_mmap", 0700); | |
296 | BUG_ON(fd == -1); | |
297 | close(fd); | |
298 | ||
299 | fd = open(".tmp_mmap", O_RDWR|O_CREAT|O_TRUNC); | |
300 | BUG_ON(fd == -1); | |
301 | ret = write(fd, zerobuff, 4096); | |
302 | BUG_ON(ret != 4096); | |
303 | ||
304 | cycles = (void *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); | |
305 | BUG_ON(cycles == (void *)-1); | |
306 | ||
307 | close(fd); | |
308 | } | |
309 | ||
310 | #define SCHED_BATCH 3 | |
311 | ||
0d53c23c JA |
312 | #if defined(__i386__) |
313 | #define rdtscll(val) \ | |
d8525fbd | 314 | do { \ |
0d53c23c | 315 | __asm__ __volatile__("rdtsc" : "=A" (val)); \ |
d8525fbd | 316 | } while (0) |
0d53c23c JA |
317 | #if 0 |
318 | #elif defined(__ia64__) | |
319 | #define rdtscll(val) \ | |
d8525fbd | 320 | do { \ |
0d53c23c | 321 | val = *__mm_clock_dev; \ |
d8525fbd | 322 | } while (0) |
0d53c23c | 323 | #endif |
d8525fbd | 324 | #else |
0d53c23c | 325 | #define rdtscll(val) \ |
d8525fbd JA |
326 | do { (val) = 0LL; } while (0) |
327 | #endif | |
328 | ||
329 | /* | |
330 | * Keep lowprio looping - to meausure the number of idle cycles | |
331 | * available. It's tricky: we do a series of RDTSC calls, and | |
332 | * if the delay to the last measurement was less than 500 cycles, | |
333 | * we conclude that only this loop ran. | |
334 | */ | |
335 | static void lowprio_cycle_soak_loop(void) | |
336 | { | |
337 | struct sched_param p = { sched_priority: 0 }; | |
338 | unsigned long long t0, t1, delta; | |
339 | ||
340 | /* | |
341 | * We are a nice +19 SCHED_BATCH task: | |
342 | */ | |
343 | BUG_ON(sched_setscheduler(0, SCHED_BATCH, &p) != 0); | |
344 | nice(40); | |
345 | ||
346 | rdtscll(t0); | |
347 | while (cycles >= 0) { | |
348 | rdtscll(t1); | |
349 | delta = t1-t0; | |
350 | if (delta < 500) | |
351 | *cycles += delta; | |
352 | t0 = t1; | |
353 | } | |
354 | } | |
355 | ||
356 | int main(int argc, char **argv) | |
357 | { | |
358 | unsigned int sk, len; | |
359 | struct sockaddr addr; | |
360 | pid_t pid; | |
361 | ||
362 | setup_shared_var(); | |
363 | ||
364 | signal(SIGCHLD, SIG_IGN); | |
365 | sk = socket(PF_INET, SOCK_STREAM, 0); | |
366 | if (sk < 0) { | |
367 | perror("socket"); | |
368 | exit(1); | |
369 | } | |
370 | if (listen(sk, 1) < 0) { | |
371 | perror("listen"); | |
372 | exit(1); | |
373 | } | |
374 | len = sizeof(addr); | |
375 | if (getsockname(sk, &addr, &len) < 0) { | |
376 | perror("getsockname"); | |
377 | exit(1); | |
378 | } | |
379 | pid = fork(); | |
380 | if (!pid) { | |
381 | lowprio_cycle_soak_loop(); | |
382 | exit(0); | |
383 | } | |
384 | nice(-20); | |
385 | child(&addr, len); | |
386 | kill(pid, SIGHUP); | |
387 | exit(0); | |
388 | } |