| 1 | #include <assert.h> |
| 2 | #include <ctype.h> |
| 3 | #include <errno.h> |
| 4 | #include <fcntl.h> |
| 5 | #include <malloc.h> |
| 6 | #include <netdb.h> |
| 7 | #include <netinet/in.h> |
| 8 | #include <netinet/tcp.h> |
| 9 | #include <sched.h> |
| 10 | #include <signal.h> |
| 11 | #include <stdio.h> |
| 12 | #include <stdlib.h> |
| 13 | #include <string.h> |
| 14 | #include <sys/mman.h> |
| 15 | #include <sys/sendfile.h> |
| 16 | #include <sys/socket.h> |
| 17 | #include <sys/time.h> |
| 18 | #include <sys/types.h> |
| 19 | #include <sys/wait.h> |
| 20 | #include <time.h> |
| 21 | #include <unistd.h> |
| 22 | |
| 23 | #include "splice.h" |
| 24 | |
| 25 | #define TARGET_HOSTNAME "localhost" |
| 26 | |
| 27 | #define BYTES (128*1024*1024UL) |
| 28 | #define BUFSIZE (64*1024U) |
| 29 | |
| 30 | #define NR (BYTES/BUFSIZE) |
| 31 | |
| 32 | #define SENDFILE_LOOPS 10 |
| 33 | #define SPLICE_LOOPS 10 |
| 34 | #define SPLICE_PIPE_LOOPS 10 |
| 35 | |
| 36 | static int sendfile_loops = SENDFILE_LOOPS; |
| 37 | static int splice_pipe_loops = SPLICE_PIPE_LOOPS; |
| 38 | #if 0 |
| 39 | static int splice_loops = SPLICE_LOOPS; |
| 40 | #endif |
| 41 | |
| 42 | static volatile long long *cycles, cycles_per_sec; |
| 43 | |
| 44 | static struct timeval start_time; |
| 45 | static double start_cycles; |
| 46 | static double cpu_pct; |
| 47 | |
| 48 | static void start_timing(const char *desc) |
| 49 | { |
| 50 | printf("%-20s: ", desc); |
| 51 | fflush(stdout); |
| 52 | gettimeofday(&start_time, NULL); |
| 53 | /* |
| 54 | * Give the lowprio cycles thread a chance to run and thus |
| 55 | * we get an accurate timestamp: |
| 56 | */ |
| 57 | sched_yield(); |
| 58 | start_cycles = (double)*cycles; |
| 59 | } |
| 60 | |
| 61 | static double end_timing(unsigned long long bytes, double *rate) |
| 62 | { |
| 63 | static long long total; |
| 64 | struct timeval end_time; |
| 65 | double usecs; |
| 66 | double end_cycles, cpu_cycles; |
| 67 | |
| 68 | gettimeofday(&end_time, NULL); |
| 69 | end_cycles = (double)*cycles; |
| 70 | |
| 71 | usecs = (double) (end_time.tv_sec - start_time.tv_sec); |
| 72 | usecs *= 1000000.0; |
| 73 | usecs += (double) (end_time.tv_usec - start_time.tv_usec); |
| 74 | total += bytes; |
| 75 | |
| 76 | cpu_cycles = end_cycles - start_cycles; |
| 77 | cpu_pct = 100.0 - |
| 78 | cpu_cycles / cycles_per_sec / ( usecs / 1000000.0 ) * 100.0; |
| 79 | |
| 80 | *rate = (double) bytes / usecs / (1024*1024) * 1000000; |
| 81 | |
| 82 | printf("%.2fMB/s (%.1fMB total, %.2f%% CPU)\n", *rate, |
| 83 | (double) total / (1024*1024), |
| 84 | cpu_pct |
| 85 | ); |
| 86 | |
| 87 | return cpu_pct; |
| 88 | } |
| 89 | |
| 90 | static void calibrate_loops(void) |
| 91 | { |
| 92 | long long l0, l1; |
| 93 | int i; |
| 94 | |
| 95 | cycles_per_sec = 0; |
| 96 | printf("calibrating cycles: "); fflush(stdout); |
| 97 | |
| 98 | /* |
| 99 | * Make sure we start on a precise timer IRQ boundary: |
| 100 | */ |
| 101 | usleep(50000); |
| 102 | |
| 103 | for (i = 0; i < 10; i++) { |
| 104 | sched_yield(); |
| 105 | l0 = *cycles; |
| 106 | usleep(200000); |
| 107 | l1 = *cycles; |
| 108 | cycles_per_sec = max(cycles_per_sec, l1-l0); |
| 109 | } |
| 110 | cycles_per_sec *= 5; |
| 111 | |
| 112 | printf("%Ld cycles/sec\n", cycles_per_sec); |
| 113 | } |
| 114 | |
| 115 | static int child(void) |
| 116 | { |
| 117 | static char buffer[BUFSIZE]; |
| 118 | int sk; |
| 119 | double c1, c2, c3; |
| 120 | int fd; |
| 121 | struct sockaddr_in s_to; |
| 122 | struct hostent *hp; |
| 123 | double r1, r2, r3, r4, r5; |
| 124 | unsigned int i; |
| 125 | int pipefd[2]; |
| 126 | loff_t off = 0; |
| 127 | |
| 128 | r1 = r2 = r3 = r4 = r5 = 0; |
| 129 | |
| 130 | sk = socket(PF_INET, SOCK_STREAM, 0); |
| 131 | if (!sk) |
| 132 | return error("socket"); |
| 133 | hp = gethostbyname (TARGET_HOSTNAME); |
| 134 | BUG_ON(!hp); |
| 135 | bzero ((char *) &s_to, sizeof (s_to)); |
| 136 | bcopy ((char *) hp->h_addr, (char *) &(s_to.sin_addr), hp->h_length); |
| 137 | s_to.sin_family = hp->h_addrtype; |
| 138 | s_to.sin_port = htons(1111); |
| 139 | |
| 140 | calibrate_loops(); |
| 141 | |
| 142 | fprintf(stdout, "BUFSIZE = %d\n", BUFSIZE); |
| 143 | fflush(stdout); |
| 144 | |
| 145 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
| 146 | return error("connect"); |
| 147 | |
| 148 | start_timing("Empty buffer"); |
| 149 | for (i = 0; i < NR; i++) { |
| 150 | if (write(sk, buffer, BUFSIZE) != BUFSIZE) |
| 151 | return error("empty buffer write"); |
| 152 | } |
| 153 | end_timing(NR*BUFSIZE, &r1); |
| 154 | |
| 155 | fd = open("largefile", O_RDONLY); |
| 156 | if (fd < 0) |
| 157 | return error("largefile"); |
| 158 | |
| 159 | start_timing("Read/write loop"); |
| 160 | for (i = 0; i < NR; i++) { |
| 161 | if (read(fd, buffer, BUFSIZE) != BUFSIZE) |
| 162 | return error("largefile read"); |
| 163 | if (write(sk, buffer, BUFSIZE) != BUFSIZE) |
| 164 | return error("largefile write"); |
| 165 | } |
| 166 | end_timing(NR*BUFSIZE, &r2); |
| 167 | close(fd); |
| 168 | close(sk); |
| 169 | |
| 170 | start_timing("sendfile"); |
| 171 | sendfile_again: |
| 172 | sk = socket(PF_INET, SOCK_STREAM, 0); |
| 173 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
| 174 | return error("connect"); |
| 175 | |
| 176 | fd = open("largefile", O_RDONLY); |
| 177 | if (fd < 0) |
| 178 | return error("largefile"); |
| 179 | |
| 180 | i = NR*BUFSIZE; |
| 181 | do { |
| 182 | int ret = sendfile(sk, fd, NULL, i); |
| 183 | i -= ret; |
| 184 | } while (i); |
| 185 | |
| 186 | close(fd); |
| 187 | close(sk); |
| 188 | if (--sendfile_loops) |
| 189 | goto sendfile_again; |
| 190 | c1 = end_timing(NR*BUFSIZE*SENDFILE_LOOPS, &r3); |
| 191 | |
| 192 | start_timing("splice-pipe"); |
| 193 | splice_pipe_again: |
| 194 | sk = socket(PF_INET, SOCK_STREAM, 0); |
| 195 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
| 196 | return error("connect"); |
| 197 | |
| 198 | fd = open("largefile", O_RDONLY); |
| 199 | if (fd < 0) |
| 200 | return error("largefile"); |
| 201 | if (pipe(pipefd) < 0) |
| 202 | return error("pipe"); |
| 203 | |
| 204 | i = NR*BUFSIZE; |
| 205 | off = 0; |
| 206 | do { |
| 207 | int ret = ssplice(fd, &off, pipefd[1], NULL, min(i, BUFSIZE), SPLICE_F_NONBLOCK); |
| 208 | if (ret <= 0) |
| 209 | return error("splice-pipe-in"); |
| 210 | i -= ret; |
| 211 | while (ret > 0) { |
| 212 | int flags = i ? SPLICE_F_MORE : 0; |
| 213 | int written = ssplice(pipefd[0], NULL, sk, NULL, ret, flags); |
| 214 | if (written <= 0) |
| 215 | return error("splice-pipe-out"); |
| 216 | ret -= written; |
| 217 | } |
| 218 | } while (i); |
| 219 | |
| 220 | close(fd); |
| 221 | close(sk); |
| 222 | close(pipefd[0]); |
| 223 | close(pipefd[1]); |
| 224 | if (--splice_pipe_loops) |
| 225 | goto splice_pipe_again; |
| 226 | c2 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r4); |
| 227 | |
| 228 | /* |
| 229 | * Direct splicing was disabled as being immediately available, |
| 230 | * it's reserved for sendfile emulation now. |
| 231 | */ |
| 232 | #if 0 |
| 233 | start_timing("splice"); |
| 234 | splice_again: |
| 235 | sk = socket(PF_INET, SOCK_STREAM, 0); |
| 236 | if (connect(sk, (struct sockaddr *)&s_to, sizeof(s_to)) < 0) |
| 237 | return error("connect"); |
| 238 | |
| 239 | fd = open("largefile", O_RDONLY); |
| 240 | if (fd < 0) |
| 241 | return error("largefile"); |
| 242 | |
| 243 | i = NR*BUFSIZE; |
| 244 | off = 0; |
| 245 | do { |
| 246 | int flags = BUFSIZE < i ? SPLICE_F_MORE : 0; |
| 247 | int ret; |
| 248 | |
| 249 | ret = ssplice(fd, &off, sk, NULL, min(i, BUFSIZE), flags); |
| 250 | |
| 251 | if (ret <= 0) |
| 252 | return error("splice"); |
| 253 | i -= ret; |
| 254 | } while (i); |
| 255 | |
| 256 | close(fd); |
| 257 | close(sk); |
| 258 | if (--splice_loops) |
| 259 | goto splice_again; |
| 260 | c3 = end_timing(NR*BUFSIZE*SPLICE_LOOPS, &r5); |
| 261 | #else |
| 262 | c3 = 0; |
| 263 | #endif |
| 264 | |
| 265 | /* |
| 266 | * c1/r3 - sendfile |
| 267 | * c2/r4 - splice-pipe |
| 268 | * c3/r5 - splice |
| 269 | */ |
| 270 | |
| 271 | if (c1 && c2) |
| 272 | printf("sendfile is %.2f%% more efficient than splice-pipe.\n", |
| 273 | (c2 - c1) / c1 * 100.0 ); |
| 274 | if (c1 && c3) |
| 275 | printf("sendfile is %.2f%% more efficient than splice.\n", |
| 276 | (c3 - c1) / c1 * 100.0 ); |
| 277 | if (c2 && c3) |
| 278 | printf("splice is %.2f%% more efficient splice-pipe.\n", |
| 279 | (c2 - c3) / c3 * 100.0 ); |
| 280 | if (r3 && r4) |
| 281 | printf("sendfile is %.2f%% faster than splice-pipe.\n", |
| 282 | (r3 - r4) / r4 * 100.0 ); |
| 283 | if (r3 && r5) |
| 284 | printf("sendfile is %.2f%% faster than splice.\n", |
| 285 | (r3 - r5) / r5 * 100.0 ); |
| 286 | if (r4 && r5) |
| 287 | printf("splice is %.2f%% faster than splice-pipe.\n", |
| 288 | (r5 - r4) / r4 * 100.0 ); |
| 289 | |
| 290 | return 0; |
| 291 | } |
| 292 | |
| 293 | |
| 294 | static void setup_shared_var(void) |
| 295 | { |
| 296 | char zerobuff [4096] = { 0, }; |
| 297 | int ret, fd; |
| 298 | |
| 299 | fd = creat(".tmp_mmap", 0700); |
| 300 | BUG_ON(fd == -1); |
| 301 | close(fd); |
| 302 | |
| 303 | fd = open(".tmp_mmap", O_RDWR|O_CREAT|O_TRUNC, 0644); |
| 304 | BUG_ON(fd == -1); |
| 305 | ret = write(fd, zerobuff, 4096); |
| 306 | BUG_ON(ret != 4096); |
| 307 | |
| 308 | cycles = (void *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); |
| 309 | BUG_ON(cycles == (void *)-1); |
| 310 | |
| 311 | close(fd); |
| 312 | } |
| 313 | |
| 314 | #define SCHED_BATCH 3 |
| 315 | |
| 316 | #if defined(__i386__) |
| 317 | #define rdtscll(val) \ |
| 318 | do { \ |
| 319 | __asm__ __volatile__("rdtsc" : "=A" (val)); \ |
| 320 | } while (0) |
| 321 | #elif defined(__x86_64__) |
| 322 | #define rdtscll(val) \ |
| 323 | do { \ |
| 324 | uint64_t lo, hi; \ |
| 325 | __asm__ __volatile__("rdtsc" : "=a" (lo), "=d" (hi)); \ |
| 326 | (val) = (hi << 32) | lo; \ |
| 327 | } while (0) |
| 328 | #if 0 |
| 329 | #elif defined(__ia64__) |
| 330 | #define rdtscll(val) \ |
| 331 | do { \ |
| 332 | val = *__mm_clock_dev; \ |
| 333 | } while (0) |
| 334 | #endif |
| 335 | #else |
| 336 | #define rdtscll(val) \ |
| 337 | do { (val) = 0LL; } while (0) |
| 338 | #endif |
| 339 | |
| 340 | /* |
| 341 | * Keep lowprio looping - to meausure the number of idle cycles |
| 342 | * available. It's tricky: we do a series of RDTSC calls, and |
| 343 | * if the delay to the last measurement was less than 500 cycles, |
| 344 | * we conclude that only this loop ran. |
| 345 | */ |
| 346 | static void lowprio_cycle_soak_loop(void) |
| 347 | { |
| 348 | struct sched_param p = { sched_priority: 0 }; |
| 349 | unsigned long long t0, t1, delta; |
| 350 | |
| 351 | /* |
| 352 | * We are a nice +19 SCHED_BATCH task: |
| 353 | */ |
| 354 | BUG_ON(sched_setscheduler(0, SCHED_BATCH, &p) != 0); |
| 355 | if (nice(40) < 0) |
| 356 | perror("nice"); |
| 357 | |
| 358 | rdtscll(t0); |
| 359 | while (cycles >= 0) { |
| 360 | rdtscll(t1); |
| 361 | delta = t1-t0; |
| 362 | if (delta < 500) |
| 363 | *cycles += delta; |
| 364 | t0 = t1; |
| 365 | } |
| 366 | } |
| 367 | |
| 368 | int main(__attribute__((__unused__)) int argc, __attribute__((__unused__)) char **argv) |
| 369 | { |
| 370 | pid_t pid; |
| 371 | |
| 372 | setup_shared_var(); |
| 373 | |
| 374 | signal(SIGCHLD, SIG_IGN); |
| 375 | |
| 376 | pid = fork(); |
| 377 | if (!pid) { |
| 378 | lowprio_cycle_soak_loop(); |
| 379 | exit(0); |
| 380 | } |
| 381 | |
| 382 | if (nice(-20) < 0) |
| 383 | perror("nice"); |
| 384 | |
| 385 | child(); |
| 386 | kill(pid, SIGHUP); |
| 387 | exit(0); |
| 388 | } |