7 #include <netinet/in.h>
8 #include <netinet/tcp.h>
15 #include <sys/sendfile.h>
16 #include <sys/socket.h>
18 #include <sys/types.h>
25 #define TARGET_HOSTNAME "localhost"
27 #define BYTES (128*1024*1024UL)
28 #define BUFSIZE (64*1024U)
30 #define NR (BYTES/BUFSIZE)
32 #define SENDFILE_LOOPS 10
33 #define SPLICE_LOOPS 10
34 #define SPLICE_PIPE_LOOPS 10
36 static int sendfile_loops = SENDFILE_LOOPS;
37 static int splice_pipe_loops = SPLICE_PIPE_LOOPS;
39 static int splice_loops = SPLICE_LOOPS;
42 static volatile long long *cycles, cycles_per_sec;
44 static struct timeval start_time;
45 static double start_cycles;
46 static double cpu_pct;
48 static void start_timing(const char *desc)
50 printf("%-20s: ", desc);
52 gettimeofday(&start_time, NULL);
54 * Give the lowprio cycles thread a chance to run and thus
55 * we get an accurate timestamp:
58 start_cycles = (double)*cycles;
61 static double end_timing(unsigned long long bytes, double *rate)
63 static long long total;
64 struct timeval end_time;
66 double end_cycles, cpu_cycles;
68 gettimeofday(&end_time, NULL);
69 end_cycles = (double)*cycles;
71 usecs = (double) (end_time.tv_sec - start_time.tv_sec);
73 usecs += (double) (end_time.tv_usec - start_time.tv_usec);
76 cpu_cycles = end_cycles - start_cycles;
78 cpu_cycles / cycles_per_sec / ( usecs / 1000000.0 ) * 100.0;
80 *rate = (double) bytes / usecs / (1024*1024) * 1000000;
82 printf("%.2fMB/s (%.1fMB total, %.2f%% CPU)\n", *rate,
83 (double) total / (1024*1024),
90 static void calibrate_loops(void)
96 printf("calibrating cycles: "); fflush(stdout);
99 * Make sure we start on a precise timer IRQ boundary:
103 for (i = 0; i < 10; i++) {
108 cycles_per_sec = max(cycles_per_sec, l1-l0);
112 printf("%Ld cycles/sec\n", cycles_per_sec);
115 static int child(void)
117 static char buffer[BUFSIZE];
121 struct sockaddr_in s_to;
123 double r1, r2, r3, r4, r5;
128 r1 = r2 = r3 = r4 = r5 = 0;
130 sk = socket(PF_INET, SOCK_STREAM, 0);
132 return error("socket");
133 hp = gethostbyname (TARGET_HOSTNAME);
135 bzero ((char *) &s_to, sizeof (s_to));
136 bcopy ((char *) hp->h_addr, (char *) &(s_to.sin_addr), hp->h_length);
137 s_to.sin_family = hp->h_addrtype;
138 s_to.sin_port = htons(1111);
142 fprintf(stdout, "BUFSIZE = %d\n", BUFSIZE);
145 if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0)
146 return error("connect");
148 start_timing("Empty buffer");
149 for (i = 0; i < NR; i++) {
150 if (write(sk, buffer, BUFSIZE) != BUFSIZE)
151 return error("empty buffer write");
153 end_timing(NR*BUFSIZE, &r1);
155 fd = open("largefile", O_RDONLY);
157 return error("largefile");
159 start_timing("Read/write loop");
160 for (i = 0; i < NR; i++) {
161 if (read(fd, buffer, BUFSIZE) != BUFSIZE)
162 return error("largefile read");
163 if (write(sk, buffer, BUFSIZE) != BUFSIZE)
164 return error("largefile write");
166 end_timing(NR*BUFSIZE, &r2);
170 start_timing("sendfile");
172 sk = socket(PF_INET, SOCK_STREAM, 0);
173 if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0)
174 return error("connect");
176 fd = open("largefile", O_RDONLY);
178 return error("largefile");
182 int ret = sendfile(sk, fd, NULL, i);
188 if (--sendfile_loops)
190 c1 = end_timing(NR*BUFSIZE*SENDFILE_LOOPS, &r3);
192 start_timing("splice-pipe");
194 sk = socket(PF_INET, SOCK_STREAM, 0);
195 if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0)
196 return error("connect");
198 fd = open("largefile", O_RDONLY);
200 return error("largefile");
201 if (pipe(pipefd) < 0)
202 return error("pipe");
207 int ret = ssplice(fd, &off, pipefd[1], NULL, min(i, BUFSIZE), SPLICE_F_NONBLOCK);
209 return error("splice-pipe-in");
212 int flags = i ? SPLICE_F_MORE : 0;
213 int written = ssplice(pipefd[0], NULL, sk, NULL, ret, flags);
215 return error("splice-pipe-out");
224 if (--splice_pipe_loops)
225 goto splice_pipe_again;
226 c2 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r4);
229 * Direct splicing was disabled as being immediately available,
230 * it's reserved for sendfile emulation now.
233 start_timing("splice");
235 sk = socket(PF_INET, SOCK_STREAM, 0);
236 if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0)
237 return error("connect");
239 fd = open("largefile", O_RDONLY);
241 return error("largefile");
246 int flags = BUFSIZE < i ? SPLICE_F_MORE : 0;
249 ret = ssplice(fd, &off, sk, NULL, min(i, BUFSIZE), flags);
252 return error("splice");
260 c3 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r5);
267 * c2/r4 - splice-pipe
272 printf("sendfile is %.2f%% more efficient than splice-pipe.\n",
273 (c2 - c1) / c1 * 100.0 );
275 printf("sendfile is %.2f%% more efficient than splice.\n",
276 (c3 - c1) / c1 * 100.0 );
278 printf("splice is %.2f%% more efficient splice-pipe.\n",
279 (c2 - c3) / c3 * 100.0 );
281 printf("sendfile is %.2f%% faster than splice-pipe.\n",
282 (r3 - r4) / r4 * 100.0 );
284 printf("sendfile is %.2f%% faster than splice.\n",
285 (r3 - r5) / r5 * 100.0 );
287 printf("splice is %.2f%% faster than splice-pipe.\n",
288 (r5 - r4) / r4 * 100.0 );
294 static void setup_shared_var(void)
296 char zerobuff [4096] = { 0, };
299 fd = creat(".tmp_mmap", 0700);
303 fd = open(".tmp_mmap", O_RDWR|O_CREAT|O_TRUNC, 0644);
305 ret = write(fd, zerobuff, 4096);
308 cycles = (void *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
309 BUG_ON(cycles == (void *)-1);
314 #define SCHED_BATCH 3
316 #if defined(__i386__)
317 #define rdtscll(val) \
319 __asm__ __volatile__("rdtsc" : "=A" (val)); \
321 #elif defined(__x86_64__)
322 #define rdtscll(val) \
325 __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); \
326 (val) = (hi << 32) | lo; \
329 #elif defined(__ia64__)
330 #define rdtscll(val) \
332 val = *__mm_clock_dev; \
336 #define rdtscll(val) \
337 do { (val) = 0LL; } while (0)
341 * Keep lowprio looping - to meausure the number of idle cycles
342 * available. It's tricky: we do a series of RDTSC calls, and
343 * if the delay to the last measurement was less than 500 cycles,
344 * we conclude that only this loop ran.
346 static void lowprio_cycle_soak_loop(void)
348 struct sched_param p = { sched_priority: 0 };
349 unsigned long long t0, t1, delta;
352 * We are a nice +19 SCHED_BATCH task:
354 BUG_ON(sched_setscheduler(0, SCHED_BATCH, &p) != 0);
359 while (cycles >= 0) {
368 int main(__attribute__((__unused__)) int argc, __attribute__((__unused__)) char **argv)
374 signal(SIGCHLD, SIG_IGN);
378 lowprio_cycle_soak_loop();