[PATCH] blktrace: only print one connection info per client
[blktrace.git] / blktrace.c
1 /*
2  * block queue tracing application
3  *
4  * Copyright (C) 2005 Jens Axboe <axboe@suse.de>
5  *
6  *  This program is free software; you can redistribute it and/or modify
7  *  it under the terms of the GNU General Public License as published by
8  *  the Free Software Foundation; either version 2 of the License, or
9  *  (at your option) any later version.
10  *
11  *  This program is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU General Public License for more details.
15  *
16  *  You should have received a copy of the GNU General Public License
17  *  along with this program; if not, write to the Free Software
18  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  */
21 #include <pthread.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <locale.h>
26 #include <signal.h>
27 #include <fcntl.h>
28 #include <string.h>
29 #include <sys/ioctl.h>
30 #include <sys/param.h>
31 #include <sys/statfs.h>
32 #include <sys/poll.h>
33 #include <sys/mman.h>
34 #include <sys/socket.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <sched.h>
38 #include <ctype.h>
39 #include <getopt.h>
40 #include <errno.h>
41 #include <netinet/in.h>
42 #include <arpa/inet.h>
43 #include <netdb.h>
44 #include <sys/sendfile.h>
45
46 #include "blktrace.h"
47 #include "barrier.h"
48
49 static char blktrace_version[] = "0.99.1";
50
51 /*
52  * You may want to increase this even more, if you are logging at a high
53  * rate and see skipped/missed events
54  */
55 #define BUF_SIZE        (512 * 1024)
56 #define BUF_NR          (4)
57
58 #define OFILE_BUF       (128 * 1024)
59
60 #define DEBUGFS_TYPE    0x64626720
61
62 #define S_OPTS  "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
63 static struct option l_opts[] = {
64         {
65                 .name = "dev",
66                 .has_arg = required_argument,
67                 .flag = NULL,
68                 .val = 'd'
69         },
70         {
71                 .name = "act-mask",
72                 .has_arg = required_argument,
73                 .flag = NULL,
74                 .val = 'a'
75         },
76         {
77                 .name = "set-mask",
78                 .has_arg = required_argument,
79                 .flag = NULL,
80                 .val = 'A'
81         },
82         {
83                 .name = "relay",
84                 .has_arg = required_argument,
85                 .flag = NULL,
86                 .val = 'r'
87         },
88         {
89                 .name = "output",
90                 .has_arg = required_argument,
91                 .flag = NULL,
92                 .val = 'o'
93         },
94         {
95                 .name = "kill",
96                 .has_arg = no_argument,
97                 .flag = NULL,
98                 .val = 'k'
99         },
100         {
101                 .name = "stopwatch",
102                 .has_arg = required_argument,
103                 .flag = NULL,
104                 .val = 'w'
105         },
106         {
107                 .name = "version",
108                 .has_arg = no_argument,
109                 .flag = NULL,
110                 .val = 'V'
111         },
112         {
113                 .name = "buffer-size",
114                 .has_arg = required_argument,
115                 .flag = NULL,
116                 .val = 'b'
117         },
118         {
119                 .name = "num-sub-buffers",
120                 .has_arg = required_argument,
121                 .flag = NULL,
122                 .val = 'n'
123         },
124         {
125                 .name = "output-dir",
126                 .has_arg = required_argument,
127                 .flag = NULL,
128                 .val = 'D'
129         },
130         {
131                 .name = "listen",
132                 .has_arg = no_argument,
133                 .flag = NULL,
134                 .val = 'l'
135         },
136         {
137                 .name = "host",
138                 .has_arg = required_argument,
139                 .flag = NULL,
140                 .val = 'h'
141         },
142         {
143                 .name = "port",
144                 .has_arg = required_argument,
145                 .flag = NULL,
146                 .val = 'p'
147         },
148         {
149                 .name = "no-sendfile",
150                 .has_arg = no_argument,
151                 .flag = NULL,
152                 .val = 's'
153         },
154         {
155                 .name = NULL,
156         }
157 };
158
159 struct tip_subbuf {
160         void *buf;
161         unsigned int len;
162         unsigned int max_len;
163 };
164
165 #define FIFO_SIZE       (1024)  /* should be plenty big! */
166 #define CL_SIZE         (128)   /* cache line, any bigger? */
167
168 struct tip_subbuf_fifo {
169         int tail __attribute__((aligned(CL_SIZE)));
170         int head __attribute__((aligned(CL_SIZE)));
171         struct tip_subbuf *q[FIFO_SIZE];
172 };
173
174 struct thread_information {
175         int cpu;
176         pthread_t thread;
177
178         int fd;
179         void *fd_buf;
180         char fn[MAXPATHLEN + 64];
181
182         FILE *ofile;
183         char *ofile_buffer;
184         off_t ofile_offset;
185         int ofile_stdout;
186         int ofile_mmap;
187
188         int (*get_subbuf)(struct thread_information *, unsigned int);
189         int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
190         int (*read_data)(struct thread_information *, void *, unsigned int);
191
192         unsigned long events_processed;
193         unsigned long long data_read;
194         unsigned long long data_queued;
195         struct device_information *device;
196
197         int exited;
198
199         /*
200          * piped fifo buffers
201          */
202         struct tip_subbuf_fifo fifo;
203         struct tip_subbuf *leftover_ts;
204
205         /*
206          * mmap controlled output files
207          */
208         unsigned long long fs_size;
209         unsigned long long fs_max_size;
210         unsigned long fs_off;
211         void *fs_buf;
212         unsigned long fs_buf_len;
213
214         struct net_connection *nc;
215 };
216
217 struct device_information {
218         int fd;
219         char *path;
220         char buts_name[32];
221         volatile int trace_started;
222         unsigned long drop_count;
223         struct thread_information *threads;
224
225         struct cl_host *ch;
226         u32 cl_id;
227         time_t cl_connect_time;
228 };
229
230 static int ncpus;
231 static struct thread_information *thread_information;
232 static int ndevs;
233 static struct device_information *device_information;
234
235 /* command line option globals */
236 static char *debugfs_path;
237 static char *output_name;
238 static char *output_dir;
239 static int act_mask = ~0U;
240 static int kill_running_trace;
241 static unsigned long buf_size = BUF_SIZE;
242 static unsigned long buf_nr = BUF_NR;
243 static unsigned int page_size;
244
245 #define is_done()       (*(volatile int *)(&done))
246 static volatile int done;
247
248 #define is_trace_stopped()      (*(volatile int *)(&trace_stopped))
249 static volatile int trace_stopped;
250
251 #define is_stat_shown() (*(volatile int *)(&stat_shown))
252 static volatile int stat_shown;
253
254 int data_is_native = -1;
255
256 static void exit_trace(int status);
257
258 #define dip_tracing(dip)        (*(volatile int *)(&(dip)->trace_started))
259 #define dip_set_tracing(dip, v) ((dip)->trace_started = (v))
260
261 #define __for_each_dip(__d, __di, __e, __i)     \
262         for (__i = 0, __d = __di; __i < __e; __i++, __d++)
263
264 #define for_each_dip(__d, __i)          \
265         __for_each_dip(__d, device_information, ndevs, __i)
266 #define for_each_nc_dip(__nc, __d, __i)         \
267         __for_each_dip(__d, (__nc)->ch->device_information, (__nc)->ch->ndevs, __i)
268
269 #define __for_each_tip(__d, __t, __ncpus, __j)  \
270         for (__j = 0, __t = (__d)->threads; __j < __ncpus; __j++, __t++)
271 #define for_each_tip(__d, __t, __j)     \
272         __for_each_tip(__d, __t, ncpus, __j)
273 #define for_each_cl_host(__c)   \
274         for (__c = cl_host_list; __c; __c = __c->list_next)
275
276 /*
277  * networking stuff follows. we include a magic number so we know whether
278  * to endianness convert or not
279  */
280 struct blktrace_net_hdr {
281         u32 magic;              /* same as trace magic */
282         char buts_name[32];     /* trace name */
283         u32 cpu;                /* for which cpu */
284         u32 max_cpus;
285         u32 len;                /* length of following trace data */
286         u32 cl_id;              /* id for set of client per-cpu connections */
287 };
288
289 #define TRACE_NET_PORT          (8462)
290
291 enum {
292         Net_none = 0,
293         Net_server,
294         Net_client,
295 };
296
297 /*
298  * network cmd line params
299  */
300 static char hostname[MAXHOSTNAMELEN];
301 static int net_port = TRACE_NET_PORT;
302 static int net_mode = 0;
303 static int net_use_sendfile = 1;
304
305 struct cl_host {
306         struct cl_host *list_next;
307         struct in_addr cl_in_addr;
308         struct net_connection *net_connections;
309         int nconn;
310         struct device_information *device_information;
311         int ndevs;
312         int ncpus;
313         int ndevs_done;
314 };
315
316 struct net_connection {
317         int in_fd;
318         struct pollfd pfd;
319         time_t connect_time;
320         struct cl_host *ch;
321         int ncpus;
322 };
323
324 #define NET_MAX_CL_HOSTS        (1024)
325 static struct cl_host *cl_host_list;
326 static int cl_hosts;
327 static int net_connects;
328
329 static int *net_out_fd;
330
331 static void handle_sigint(__attribute__((__unused__)) int sig)
332 {
333         struct device_information *dip;
334         int i;
335
336         /*
337          * stop trace so we can reap currently produced data
338          */
339         for_each_dip(dip, i) {
340                 if (dip->fd == -1)
341                         continue;
342                 if (ioctl(dip->fd, BLKTRACESTOP) < 0)
343                         perror("BLKTRACESTOP");
344         }
345
346         done = 1;
347 }
348
349 static int get_dropped_count(const char *buts_name)
350 {
351         int fd;
352         char tmp[MAXPATHLEN + 64];
353
354         snprintf(tmp, sizeof(tmp), "%s/block/%s/dropped",
355                  debugfs_path, buts_name);
356
357         fd = open(tmp, O_RDONLY);
358         if (fd < 0) {
359                 /*
360                  * this may be ok, if the kernel doesn't support dropped counts
361                  */
362                 if (errno == ENOENT)
363                         return 0;
364
365                 fprintf(stderr, "Couldn't open dropped file %s\n", tmp);
366                 return -1;
367         }
368
369         if (read(fd, tmp, sizeof(tmp)) < 0) {
370                 perror(tmp);
371                 close(fd);
372                 return -1;
373         }
374
375         close(fd);
376
377         return atoi(tmp);
378 }
379
380 static int start_trace(struct device_information *dip)
381 {
382         struct blk_user_trace_setup buts;
383
384         memset(&buts, 0, sizeof(buts));
385         buts.buf_size = buf_size;
386         buts.buf_nr = buf_nr;
387         buts.act_mask = act_mask;
388
389         if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
390                 perror("BLKTRACESETUP");
391                 return 1;
392         }
393
394         if (ioctl(dip->fd, BLKTRACESTART) < 0) {
395                 perror("BLKTRACESTART");
396                 return 1;
397         }
398
399         memcpy(dip->buts_name, buts.name, sizeof(dip->buts_name));
400         dip_set_tracing(dip, 1);
401         return 0;
402 }
403
404 static void stop_trace(struct device_information *dip)
405 {
406         if (dip_tracing(dip) || kill_running_trace) {
407                 dip_set_tracing(dip, 0);
408
409                 /*
410                  * should be stopped, just don't complain if it isn't
411                  */
412                 ioctl(dip->fd, BLKTRACESTOP);
413
414                 if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
415                         perror("BLKTRACETEARDOWN");
416
417                 close(dip->fd);
418                 dip->fd = -1;
419         }
420 }
421
422 static void stop_all_traces(void)
423 {
424         struct device_information *dip;
425         int i;
426
427         for_each_dip(dip, i) {
428                 dip->drop_count = get_dropped_count(dip->buts_name);
429                 stop_trace(dip);
430         }
431 }
432
433 static void wait_for_data(struct thread_information *tip, int timeout)
434 {
435         struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
436
437         while (!is_done()) {
438                 if (poll(&pfd, 1, timeout) < 0) {
439                         perror("poll");
440                         break;
441                 }
442                 if (pfd.revents & POLLIN)
443                         break;
444                 if (tip->ofile_stdout)
445                         break;
446         }
447 }
448
449 static int read_data_file(struct thread_information *tip, void *buf,
450                           unsigned int len)
451 {
452         int ret = 0;
453
454         do {
455                 wait_for_data(tip, 100);
456
457                 ret = read(tip->fd, buf, len);
458                 if (!ret)
459                         continue;
460                 else if (ret > 0)
461                         return ret;
462                 else {
463                         if (errno != EAGAIN) {
464                                 perror(tip->fn);
465                                 fprintf(stderr,"Thread %d failed read of %s\n",
466                                         tip->cpu, tip->fn);
467                                 break;
468                         }
469                         continue;
470                 }
471         } while (!is_done());
472
473         return ret;
474
475 }
476
477 static int read_data_net(struct thread_information *tip, void *buf,
478                          unsigned int len)
479 {
480         struct net_connection *nc = tip->nc;
481         unsigned int bytes_left = len;
482         int ret = 0;
483
484         do {
485                 ret = recv(nc->in_fd, buf, bytes_left, MSG_WAITALL);
486
487                 if (!ret)
488                         continue;
489                 else if (ret < 0) {
490                         if (errno != EAGAIN) {
491                                 perror(tip->fn);
492                                 fprintf(stderr, "server: failed read\n");
493                                 return 0;
494                         }
495                         continue;
496                 } else {
497                         buf += ret;
498                         bytes_left -= ret;
499                 }
500         } while (!is_done() && bytes_left);
501
502         return len - bytes_left;
503 }
504
505 static inline struct tip_subbuf *
506 subbuf_fifo_dequeue(struct thread_information *tip)
507 {
508         const int head = tip->fifo.head;
509         const int next = (head + 1) & (FIFO_SIZE - 1);
510
511         if (head != tip->fifo.tail) {
512                 struct tip_subbuf *ts = tip->fifo.q[head];
513
514                 store_barrier();
515                 tip->fifo.head = next;
516                 return ts;
517         }
518
519         return NULL;
520 }
521
522 static inline int subbuf_fifo_queue(struct thread_information *tip,
523                                     struct tip_subbuf *ts)
524 {
525         const int tail = tip->fifo.tail;
526         const int next = (tail + 1) & (FIFO_SIZE - 1);
527
528         if (next != tip->fifo.head) {
529                 tip->fifo.q[tail] = ts;
530                 store_barrier();
531                 tip->fifo.tail = next;
532                 return 0;
533         }
534
535         fprintf(stderr, "fifo too small!\n");
536         return 1;
537 }
538
539 /*
540  * For file output, truncate and mmap the file appropriately
541  */
542 static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
543 {
544         int ofd = fileno(tip->ofile);
545         int ret;
546
547         /*
548          * extend file, if we have to. use chunks of 16 subbuffers.
549          */
550         if (tip->fs_off + buf_size > tip->fs_buf_len) {
551                 if (tip->fs_buf) {
552                         munlock(tip->fs_buf, tip->fs_buf_len);
553                         munmap(tip->fs_buf, tip->fs_buf_len);
554                         tip->fs_buf = NULL;
555                 }
556
557                 tip->fs_off = tip->fs_size & (page_size - 1);
558                 tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
559                 tip->fs_max_size += tip->fs_buf_len;
560
561                 if (ftruncate(ofd, tip->fs_max_size) < 0) {
562                         perror("ftruncate");
563                         return -1;
564                 }
565
566                 tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
567                                    MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
568                 if (tip->fs_buf == MAP_FAILED) {
569                         perror("mmap");
570                         return -1;
571                 }
572                 mlock(tip->fs_buf, tip->fs_buf_len);
573         }
574
575         ret = tip->read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
576         if (ret >= 0) {
577                 tip->data_read += ret;
578                 tip->fs_size += ret;
579                 tip->fs_off += ret;
580                 return 0;
581         }
582
583         return -1;
584 }
585
586 /*
587  * Use the copy approach for pipes and network
588  */
589 static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
590 {
591         struct tip_subbuf *ts = malloc(sizeof(*ts));
592         int ret;
593
594         ts->buf = malloc(buf_size);
595         ts->max_len = maxlen;
596
597         ret = tip->read_data(tip, ts->buf, ts->max_len);
598         if (ret > 0) {
599                 ts->len = ret;
600                 tip->data_read += ret;
601                 if (subbuf_fifo_queue(tip, ts))
602                         return -1;
603         }
604
605         return ret;
606 }
607
608 static void close_thread(struct thread_information *tip)
609 {
610         if (tip->fd != -1)
611                 close(tip->fd);
612         if (tip->ofile)
613                 fclose(tip->ofile);
614         if (tip->ofile_buffer)
615                 free(tip->ofile_buffer);
616         if (tip->fd_buf)
617                 free(tip->fd_buf);
618
619         tip->fd = -1;
620         tip->ofile = NULL;
621         tip->ofile_buffer = NULL;
622         tip->fd_buf = NULL;
623 }
624
625 static void tip_ftrunc_final(struct thread_information *tip)
626 {
627         /*
628          * truncate to right size and cleanup mmap
629          */
630         if (tip->ofile_mmap && tip->ofile) {
631                 int ofd = fileno(tip->ofile);
632
633                 if (tip->fs_buf)
634                         munmap(tip->fs_buf, tip->fs_buf_len);
635
636                 ftruncate(ofd, tip->fs_size);
637         }
638 }
639
640 static void *thread_main(void *arg)
641 {
642         struct thread_information *tip = arg;
643         pid_t pid = getpid();
644         cpu_set_t cpu_mask;
645
646         CPU_ZERO(&cpu_mask);
647         CPU_SET((tip->cpu), &cpu_mask);
648
649         if (sched_setaffinity(pid, sizeof(cpu_mask), &cpu_mask) == -1) {
650                 perror("sched_setaffinity");
651                 exit_trace(1);
652         }
653
654         snprintf(tip->fn, sizeof(tip->fn), "%s/block/%s/trace%d",
655                         debugfs_path, tip->device->buts_name, tip->cpu);
656         tip->fd = open(tip->fn, O_RDONLY);
657         if (tip->fd < 0) {
658                 perror(tip->fn);
659                 fprintf(stderr,"Thread %d failed open of %s\n", tip->cpu,
660                         tip->fn);
661                 exit_trace(1);
662         }
663
664         while (!is_done()) {
665                 if (tip->get_subbuf(tip, buf_size) < 0)
666                         break;
667         }
668
669         /*
670          * trace is stopped, pull data until we get a short read
671          */
672         while (tip->get_subbuf(tip, buf_size) > 0)
673                 ;
674
675         tip_ftrunc_final(tip);
676         tip->exited = 1;
677         return NULL;
678 }
679
680 static int write_data_net(int fd, void *buf, unsigned int buf_len)
681 {
682         unsigned int bytes_left = buf_len;
683         int ret;
684
685         while (bytes_left) {
686                 ret = send(fd, buf, bytes_left, 0);
687                 if (ret < 0) {
688                         perror("send");
689                         return 1;
690                 }
691
692                 buf += ret;
693                 bytes_left -= ret;
694         }
695
696         return 0;
697 }
698
699 static int net_send_header(struct thread_information *tip, unsigned int len)
700 {
701         struct blktrace_net_hdr hdr;
702
703         hdr.magic = BLK_IO_TRACE_MAGIC;
704         strcpy(hdr.buts_name, tip->device->buts_name);
705         hdr.cpu = tip->cpu;
706         hdr.max_cpus = ncpus;
707         hdr.len = len;
708         hdr.cl_id = getpid();
709
710         return write_data_net(net_out_fd[tip->cpu], &hdr, sizeof(hdr));
711 }
712
713 /*
714  * send header with 0 length to signal end-of-run
715  */
716 static void net_client_send_close(void)
717 {
718         struct device_information *dip;
719         struct blktrace_net_hdr hdr;
720         int i;
721
722         for_each_dip(dip, i) {
723                 hdr.magic = BLK_IO_TRACE_MAGIC;
724                 hdr.max_cpus = ncpus;
725                 hdr.len = 0;
726                 strcpy(hdr.buts_name, dip->buts_name);
727                 hdr.cpu = get_dropped_count(dip->buts_name);
728                 hdr.cl_id = getpid();
729
730                 write_data_net(net_out_fd[0], &hdr, sizeof(hdr));
731         }
732
733 }
734
735 static int flush_subbuf_net(struct thread_information *tip,
736                             struct tip_subbuf *ts)
737 {
738         if (net_send_header(tip, ts->len))
739                 return -1;
740         if (write_data_net(net_out_fd[tip->cpu], ts->buf, ts->len))
741                 return -1;
742
743         free(ts->buf);
744         free(ts);
745         return 1;
746 }
747
748 static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
749 {
750         int ret = sendfile(net_out_fd[tip->cpu], tip->fd, NULL, ts->len);
751
752         if (ret < 0) {
753                 perror("sendfile");
754                 return 1;
755         } else if (ret < (int) ts->len) {
756                 fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
757                 return 1;
758         }
759
760         return 0;
761 }
762
763 static int flush_subbuf_sendfile(struct thread_information *tip,
764                                  struct tip_subbuf *ts)
765 {
766         int ret = -1;
767
768         if (net_send_header(tip, ts->len))
769                 goto err;
770         if (net_sendfile(tip, ts))
771                 goto err;
772
773         tip->data_read += ts->len;
774         tip->ofile_offset += buf_size;
775         ret = 1;
776 err:
777         free(ts);
778         return ret;
779 }
780
781 static int get_subbuf_sendfile(struct thread_information *tip,
782                                __attribute__((__unused__)) unsigned int maxlen)
783 {
784         struct tip_subbuf *ts;
785         struct stat sb;
786         unsigned int ready;
787
788         wait_for_data(tip, 250);
789
790         if (fstat(tip->fd, &sb) < 0) {
791                 perror("trace stat");
792                 return -1;
793         }
794
795         ready = sb.st_size - tip->data_queued;
796         if (!ready) {
797                 usleep(1000);
798                 return 0;
799         }
800
801         ts = malloc(sizeof(*ts));
802         ts->buf = NULL;
803         ts->max_len = 0;
804         ts->len = ready;
805         tip->data_queued += ready;
806
807         if (flush_subbuf_sendfile(tip, ts) < 0)
808                 return -1;
809
810         return ready;
811 }
812
813 static int write_data(struct thread_information *tip, void *buf,
814                       unsigned int buf_len)
815 {
816         int ret;
817
818         if (!buf_len)
819                 return 0;
820
821         while (1) {
822                 ret = fwrite(buf, buf_len, 1, tip->ofile);
823                 if (ret == 1)
824                         break;
825
826                 if (ret < 0) {
827                         perror("write");
828                         return 1;
829                 }
830         }
831
832         if (tip->ofile_stdout)
833                 fflush(tip->ofile);
834
835         return 0;
836 }
837
838 static int flush_subbuf_file(struct thread_information *tip,
839                              struct tip_subbuf *ts)
840 {
841         unsigned int offset = 0;
842         struct blk_io_trace *t;
843         int pdu_len, events = 0;
844
845         /*
846          * surplus from last run
847          */
848         if (tip->leftover_ts) {
849                 struct tip_subbuf *prev_ts = tip->leftover_ts;
850
851                 if (prev_ts->len + ts->len > prev_ts->max_len) {
852                         prev_ts->max_len += ts->len;
853                         prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
854                 }
855
856                 memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
857                 prev_ts->len += ts->len;
858
859                 free(ts->buf);
860                 free(ts);
861
862                 ts = prev_ts;
863                 tip->leftover_ts = NULL;
864         }
865
866         while (offset + sizeof(*t) <= ts->len) {
867                 t = ts->buf + offset;
868
869                 if (verify_trace(t)) {
870                         write_data(tip, ts->buf, offset);
871                         return -1;
872                 }
873
874                 pdu_len = t->pdu_len;
875
876                 if (offset + sizeof(*t) + pdu_len > ts->len)
877                         break;
878
879                 offset += sizeof(*t) + pdu_len;
880                 tip->events_processed++;
881                 tip->data_read += sizeof(*t) + pdu_len;
882                 events++;
883         }
884
885         if (write_data(tip, ts->buf, offset))
886                 return -1;
887
888         /*
889          * leftover bytes, save them for next time
890          */
891         if (offset != ts->len) {
892                 tip->leftover_ts = ts;
893                 ts->len -= offset;
894                 memmove(ts->buf, ts->buf + offset, ts->len);
895         } else {
896                 free(ts->buf);
897                 free(ts);
898         }
899
900         return events;
901 }
902
903 static int write_tip_events(struct thread_information *tip)
904 {
905         struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
906
907         if (ts)
908                 return tip->flush_subbuf(tip, ts);
909
910         return 0;
911 }
912
913 /*
914  * scans the tips we know and writes out the subbuffers we accumulate
915  */
916 static void get_and_write_events(void)
917 {
918         struct device_information *dip;
919         struct thread_information *tip;
920         int i, j, events, ret, tips_running;
921
922         while (!is_done()) {
923                 events = 0;
924
925                 for_each_dip(dip, i) {
926                         for_each_tip(dip, tip, j) {
927                                 ret = write_tip_events(tip);
928                                 if (ret > 0)
929                                         events += ret;
930                         }
931                 }
932
933                 if (!events)
934                         usleep(100000);
935         }
936
937         /*
938          * reap stored events
939          */
940         do {
941                 events = 0;
942                 tips_running = 0;
943                 for_each_dip(dip, i) {
944                         for_each_tip(dip, tip, j) {
945                                 ret = write_tip_events(tip);
946                                 if (ret > 0)
947                                         events += ret;
948                                 tips_running += !tip->exited;
949                         }
950                 }
951                 usleep(10);
952         } while (events || tips_running);
953 }
954
955 static void wait_for_threads(void)
956 {
957         /*
958          * for piped or network output, poll and fetch data for writeout.
959          * for files, we just wait around for trace threads to exit
960          */
961         if ((output_name && !strcmp(output_name, "-")) ||
962             ((net_mode == Net_client) && !net_use_sendfile))
963                 get_and_write_events();
964         else {
965                 struct device_information *dip;
966                 struct thread_information *tip;
967                 int i, j, tips_running;
968
969                 do {
970                         tips_running = 0;
971                         usleep(100000);
972
973                         for_each_dip(dip, i)
974                                 for_each_tip(dip, tip, j)
975                                         tips_running += !tip->exited;
976                 } while (tips_running);
977         }
978
979         if (net_mode == Net_client)
980                 net_client_send_close();
981 }
982
983 static int fill_ofname(struct device_information *dip,
984                        struct thread_information *tip, char *dst,
985                        char *buts_name)
986 {
987         struct stat sb;
988         int len = 0;
989
990         if (output_dir)
991                 len = sprintf(dst, "%s/", output_dir);
992         else
993                 len = sprintf(dst, "./");
994
995         if (net_mode == Net_server) {
996                 struct net_connection *nc = tip->nc;
997
998                 len += sprintf(dst + len, "%s-", inet_ntoa(nc->ch->cl_in_addr));
999                 len += strftime(dst + len, 64, "%F-%T/", gmtime(&dip->cl_connect_time));
1000         }
1001
1002         if (stat(dst, &sb) < 0) {
1003                 if (errno != ENOENT) {
1004                         perror("stat");
1005                         return 1;
1006                 }
1007                 if (mkdir(dst, 0755) < 0) {
1008                         perror(dst);
1009                         fprintf(stderr, "Can't make output dir\n");
1010                         return 1;
1011                 }
1012         }
1013
1014         if (output_name)
1015                 sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
1016         else
1017                 sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
1018
1019         return 0;
1020 }
1021
1022 static void fill_ops(struct thread_information *tip)
1023 {
1024         /*
1025          * setup ops
1026          */
1027         if (net_mode == Net_client) {
1028                 if (net_use_sendfile) {
1029                         tip->get_subbuf = get_subbuf_sendfile;
1030                         tip->flush_subbuf = NULL;
1031                 } else {
1032                         tip->get_subbuf = get_subbuf;
1033                         tip->flush_subbuf = flush_subbuf_net;
1034                 }
1035         } else {
1036                 if (tip->ofile_mmap)
1037                         tip->get_subbuf = mmap_subbuf;
1038                 else
1039                         tip->get_subbuf = get_subbuf;
1040
1041                 tip->flush_subbuf = flush_subbuf_file;
1042         }
1043                         
1044         if (net_mode == Net_server)
1045                 tip->read_data = read_data_net;
1046         else
1047                 tip->read_data = read_data_file;
1048 }
1049
1050 static int tip_open_output(struct device_information *dip,
1051                            struct thread_information *tip)
1052 {
1053         int pipeline = output_name && !strcmp(output_name, "-");
1054         int mode, vbuf_size;
1055         char op[128];
1056
1057         if (net_mode == Net_client) {
1058                 tip->ofile = NULL;
1059                 tip->ofile_stdout = 0;
1060                 tip->ofile_mmap = 0;
1061                 goto done;
1062         } else if (pipeline) {
1063                 tip->ofile = fdopen(STDOUT_FILENO, "w");
1064                 tip->ofile_stdout = 1;
1065                 tip->ofile_mmap = 0;
1066                 mode = _IOLBF;
1067                 vbuf_size = 512;
1068         } else {
1069                 if (fill_ofname(dip, tip, op, dip->buts_name))
1070                         return 1;
1071                 tip->ofile = fopen(op, "w+");
1072                 tip->ofile_stdout = 0;
1073                 tip->ofile_mmap = 1;
1074                 mode = _IOFBF;
1075                 vbuf_size = OFILE_BUF;
1076         }
1077
1078         if (tip->ofile == NULL) {
1079                 perror(op);
1080                 return 1;
1081         }
1082
1083         tip->ofile_buffer = malloc(vbuf_size);
1084         if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
1085                 perror("setvbuf");
1086                 close_thread(tip);
1087                 return 1;
1088         }
1089
1090 done:
1091         fill_ops(tip);
1092         return 0;
1093 }
1094
1095 static int start_threads(struct device_information *dip)
1096 {
1097         struct thread_information *tip;
1098         int j;
1099
1100         for_each_tip(dip, tip, j) {
1101                 tip->cpu = j;
1102                 tip->device = dip;
1103                 tip->events_processed = 0;
1104                 tip->fd = -1;
1105                 memset(&tip->fifo, 0, sizeof(tip->fifo));
1106                 tip->leftover_ts = NULL;
1107
1108                 if (tip_open_output(dip, tip))
1109                         return 1;
1110
1111                 if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
1112                         perror("pthread_create");
1113                         close_thread(tip);
1114                         return 1;
1115                 }
1116         }
1117
1118         return 0;
1119 }
1120
1121 static void stop_threads(struct device_information *dip)
1122 {
1123         struct thread_information *tip;
1124         unsigned long ret;
1125         int i;
1126
1127         for_each_tip(dip, tip, i) {
1128                 (void) pthread_join(tip->thread, (void *) &ret);
1129                 close_thread(tip);
1130         }
1131 }
1132
1133 static void stop_all_threads(void)
1134 {
1135         struct device_information *dip;
1136         int i;
1137
1138         for_each_dip(dip, i)
1139                 stop_threads(dip);
1140 }
1141
1142 static void stop_all_tracing(void)
1143 {
1144         struct device_information *dip;
1145         int i;
1146
1147         for_each_dip(dip, i)
1148                 stop_trace(dip);
1149 }
1150
1151 static void exit_trace(int status)
1152 {
1153         if (!is_trace_stopped()) {
1154                 trace_stopped = 1;
1155                 stop_all_threads();
1156                 stop_all_tracing();
1157         }
1158
1159         exit(status);
1160 }
1161
1162 static int resize_devices(char *path)
1163 {
1164         int size = (ndevs + 1) * sizeof(struct device_information);
1165
1166         device_information = realloc(device_information, size);
1167         if (!device_information) {
1168                 fprintf(stderr, "Out of memory, device %s (%d)\n", path, size);
1169                 return 1;
1170         }
1171         device_information[ndevs].path = path;
1172         ndevs++;
1173         return 0;
1174 }
1175
1176 static int open_devices(void)
1177 {
1178         struct device_information *dip;
1179         int i;
1180
1181         for_each_dip(dip, i) {
1182                 dip->fd = open(dip->path, O_RDONLY | O_NONBLOCK);
1183                 if (dip->fd < 0) {
1184                         perror(dip->path);
1185                         return 1;
1186                 }
1187         }
1188
1189         return 0;
1190 }
1191
1192 static int start_devices(void)
1193 {
1194         struct device_information *dip;
1195         int i, j, size;
1196
1197         size = ncpus * sizeof(struct thread_information);
1198         thread_information = malloc(size * ndevs);
1199         if (!thread_information) {
1200                 fprintf(stderr, "Out of memory, threads (%d)\n", size * ndevs);
1201                 return 1;
1202         }
1203
1204         for_each_dip(dip, i) {
1205                 if (start_trace(dip)) {
1206                         close(dip->fd);
1207                         fprintf(stderr, "Failed to start trace on %s\n",
1208                                 dip->path);
1209                         break;
1210                 }
1211         }
1212
1213         if (i != ndevs) {
1214                 __for_each_dip(dip, device_information, i, j)
1215                         stop_trace(dip);
1216
1217                 return 1;
1218         }
1219
1220         for_each_dip(dip, i) {
1221                 dip->threads = thread_information + (i * ncpus);
1222                 if (start_threads(dip)) {
1223                         fprintf(stderr, "Failed to start worker threads\n");
1224                         break;
1225                 }
1226         }
1227
1228         if (i != ndevs) {
1229                 __for_each_dip(dip, device_information, i, j)
1230                         stop_threads(dip);
1231                 for_each_dip(dip, i)
1232                         stop_trace(dip);
1233
1234                 return 1;
1235         }
1236
1237         return 0;
1238 }
1239
1240 static void show_stats(struct device_information *dips, int ndips, int cpus)
1241 {
1242         struct device_information *dip;
1243         struct thread_information *tip;
1244         unsigned long long events_processed, data_read;
1245         unsigned long total_drops;
1246         int i, j, no_stdout = 0;
1247
1248         if (is_stat_shown())
1249                 return;
1250
1251         if (output_name && !strcmp(output_name, "-"))
1252                 no_stdout = 1;
1253
1254         stat_shown = 1;
1255
1256         total_drops = 0;
1257         __for_each_dip(dip, dips, ndips, i) {
1258                 if (!no_stdout)
1259                         printf("Device: %s\n", dip->path);
1260                 events_processed = 0;
1261                 data_read = 0;
1262                 __for_each_tip(dip, tip, cpus, j) {
1263                         if (!no_stdout)
1264                                 printf("  CPU%3d: %20lu events, %8llu KiB data\n",
1265                                         tip->cpu, tip->events_processed,
1266                                         (tip->data_read + 1023) >> 10);
1267                         events_processed += tip->events_processed;
1268                         data_read += tip->data_read;
1269                 }
1270                 total_drops += dip->drop_count;
1271                 if (!no_stdout)
1272                         printf("  Total:  %20llu events (dropped %lu), %8llu KiB data\n",
1273                                         events_processed, dip->drop_count,
1274                                         (data_read + 1023) >> 10);
1275         }
1276
1277         if (total_drops)
1278                 fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
1279 }
1280
1281 static struct device_information *net_get_dip(struct net_connection *nc,
1282                                               char *buts_name, u32 cl_id)
1283 {
1284         struct device_information *dip, *cl_dip = NULL;
1285         struct cl_host *ch = nc->ch;
1286         int i;
1287
1288         for (i = 0; i < ch->ndevs; i++) {
1289                 dip = &ch->device_information[i];
1290
1291                 if (!strcmp(dip->buts_name, buts_name))
1292                         return dip;
1293
1294                 if (dip->cl_id == cl_id)
1295                         cl_dip = dip;
1296         }
1297
1298         ch->device_information = realloc(ch->device_information, (ch->ndevs + 1) * sizeof(*dip));
1299         dip = &ch->device_information[ch->ndevs];
1300         memset(dip, 0, sizeof(*dip));
1301         dip->fd = -1;
1302         dip->ch = ch;
1303         dip->cl_id = cl_id;
1304         if (cl_dip)
1305                 dip->cl_connect_time = cl_dip->cl_connect_time;
1306         else
1307                 dip->cl_connect_time = nc->connect_time;
1308         strcpy(dip->buts_name, buts_name);
1309         dip->path = strdup(buts_name);
1310         dip->trace_started = 1;
1311         ch->ndevs++;
1312         dip->threads = malloc(nc->ncpus * sizeof(struct thread_information));
1313         memset(dip->threads, 0, nc->ncpus * sizeof(struct thread_information));
1314
1315         /*
1316          * open all files
1317          */
1318         for (i = 0; i < nc->ncpus; i++) {
1319                 struct thread_information *tip = &dip->threads[i];
1320
1321                 tip->cpu = i;
1322                 tip->device = dip;
1323                 tip->fd = -1;
1324                 tip->nc = nc;
1325                 
1326                 if (tip_open_output(dip, tip))
1327                         return NULL;
1328
1329                 tip->nc = NULL;
1330         }
1331
1332         return dip;
1333 }
1334
1335 static struct thread_information *net_get_tip(struct net_connection *nc,
1336                                               struct blktrace_net_hdr *bnh)
1337 {
1338         struct device_information *dip;
1339         struct thread_information *tip;
1340
1341         dip = net_get_dip(nc, bnh->buts_name, bnh->cl_id);
1342         if (!dip->trace_started) {
1343                 fprintf(stderr, "Events for closed devices %s\n", dip->buts_name);
1344                 return NULL;
1345         }
1346
1347         tip = &dip->threads[bnh->cpu];
1348         if (!tip->nc)
1349                 tip->nc = nc;
1350         
1351         return tip;
1352 }
1353
1354 static int net_get_header(struct net_connection *nc,
1355                           struct blktrace_net_hdr *bnh)
1356 {
1357         int fl = fcntl(nc->in_fd, F_GETFL);
1358         int bytes_left, ret;
1359         void *p = bnh;
1360
1361         fcntl(nc->in_fd, F_SETFL, fl | O_NONBLOCK);
1362         bytes_left = sizeof(*bnh);
1363         while (bytes_left && !is_done()) {
1364                 ret = recv(nc->in_fd, p, bytes_left, MSG_WAITALL);
1365                 if (ret < 0) {
1366                         if (errno != EAGAIN) {
1367                                 perror("recv header");
1368                                 return 1;
1369                         }
1370                         usleep(1000);
1371                         continue;
1372                 } else if (!ret) {
1373                         usleep(1000);
1374                         continue;
1375                 } else {
1376                         p += ret;
1377                         bytes_left -= ret;
1378                 }
1379         }
1380         fcntl(nc->in_fd, F_SETFL, fl & ~O_NONBLOCK);
1381         return bytes_left;
1382 }
1383
1384 /*
1385  * finalize a net client: truncate files, show stats, cleanup, etc
1386  */
1387 static void device_done(struct net_connection *nc, struct device_information *dip)
1388 {
1389         struct thread_information *tip;
1390         int i;
1391
1392         __for_each_tip(dip, tip, nc->ncpus, i)
1393                 tip_ftrunc_final(tip);
1394
1395         show_stats(dip, 1, nc->ncpus);
1396
1397         /*
1398          * cleanup for next run
1399          */
1400         __for_each_tip(dip, tip, nc->ncpus, i) {
1401                 if (tip->ofile)
1402                         fclose(tip->ofile);
1403         }
1404
1405         free(dip->threads);
1406         free(dip->path);
1407
1408         close(nc->in_fd);
1409         nc->in_fd = -1;
1410
1411         stat_shown = 0;
1412 }
1413
1414 static inline int in_addr_eq(struct in_addr a, struct in_addr b)
1415 {
1416         return a.s_addr == b.s_addr;
1417 }
1418
1419 static void net_add_client_host(struct cl_host *ch)
1420 {
1421         ch->list_next = cl_host_list;
1422         cl_host_list = ch;
1423         cl_hosts++;
1424 }
1425
1426 static void net_remove_client_host(struct cl_host *ch)
1427 {
1428         struct cl_host *p, *c;
1429         
1430         for (p = c = cl_host_list; c; c = c->list_next) {
1431                 if (c == ch) {
1432                         if (p == c)
1433                                 cl_host_list = c->list_next;
1434                         else
1435                                 p->list_next = c->list_next;
1436                         cl_hosts--;
1437                         return;
1438                 }
1439                 p = c;
1440         }
1441 }
1442
1443 static struct cl_host *net_find_client_host(struct in_addr cl_in_addr)
1444 {
1445         struct cl_host *ch = cl_host_list;
1446
1447         while (ch) {
1448                 if (in_addr_eq(ch->cl_in_addr, cl_in_addr))
1449                         return ch;
1450                 ch = ch->list_next;
1451         }
1452
1453         return NULL;
1454 }
1455
1456 static void net_client_host_done(struct cl_host *ch)
1457 {
1458         free(ch->device_information);
1459         free(ch->net_connections);
1460         net_connects -= ch->nconn;
1461         net_remove_client_host(ch);
1462         free(ch);
1463 }
1464
1465 /*
1466  * handle incoming events from a net client
1467  */
1468 static int net_client_data(struct net_connection *nc)
1469 {
1470         struct thread_information *tip;
1471         struct blktrace_net_hdr bnh;
1472
1473         if (net_get_header(nc, &bnh))
1474                 return 1;
1475
1476         if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
1477                 fprintf(stderr, "server: received data is bad\n");
1478                 return 1;
1479         }
1480
1481         if (!data_is_native) {
1482                 bnh.magic = be32_to_cpu(bnh.magic);
1483                 bnh.cpu = be32_to_cpu(bnh.cpu);
1484                 bnh.max_cpus = be32_to_cpu(bnh.max_cpus);
1485                 bnh.len = be32_to_cpu(bnh.len);
1486                 bnh.cl_id = be32_to_cpu(bnh.cl_id);
1487         }
1488
1489         if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
1490                 fprintf(stderr, "server: bad data magic\n");
1491                 return 1;
1492         }
1493
1494         if (nc->ncpus == -1)
1495                 nc->ncpus = bnh.max_cpus;
1496
1497         /*
1498          * len == 0 means that the other end signalled end-of-run
1499          */
1500         if (!bnh.len) {
1501                 /*
1502                  * overload cpu count with dropped events
1503                  */
1504                 struct device_information *dip;
1505
1506                 dip = net_get_dip(nc, bnh.buts_name, bnh.cl_id);
1507                 dip->drop_count = bnh.cpu;
1508                 dip->trace_started = 0;
1509
1510                 printf("server: end of run for %s\n", dip->buts_name);
1511
1512                 device_done(nc, dip);
1513
1514                 if (++nc->ch->ndevs_done == nc->ch->ndevs)
1515                         net_client_host_done(nc->ch);
1516
1517                 return 0;
1518         }
1519
1520         tip = net_get_tip(nc, &bnh);
1521         if (!tip)
1522                 return 1;
1523
1524         if (mmap_subbuf(tip, bnh.len))
1525                 return 1;
1526
1527         return 0;
1528 }
1529
1530 static void net_add_connection(int listen_fd, struct sockaddr_in *addr)
1531 {
1532         socklen_t socklen = sizeof(*addr);
1533         struct net_connection *nc;
1534         struct cl_host *ch;
1535         int in_fd;
1536
1537         in_fd = accept(listen_fd, (struct sockaddr *) addr, &socklen);
1538         if (in_fd < 0) {
1539                 perror("accept");
1540                 return;
1541         }
1542
1543         ch = net_find_client_host(addr->sin_addr);
1544         if (!ch) {
1545                 if (cl_hosts == NET_MAX_CL_HOSTS) {
1546                         fprintf(stderr, "server: no more clients allowed\n");
1547                         return;
1548                 }
1549                 ch = malloc(sizeof(struct cl_host));
1550                 memset(ch, 0, sizeof(*ch));
1551                 ch->cl_in_addr = addr->sin_addr;
1552                 net_add_client_host(ch);
1553
1554                 printf("server: connection from %s\n", inet_ntoa(addr->sin_addr));
1555         }
1556
1557         ch->net_connections = realloc(ch->net_connections, (ch->nconn + 1) * sizeof(*nc));
1558         nc = &ch->net_connections[ch->nconn++];
1559         memset(nc, 0, sizeof(*nc));
1560
1561         time(&nc->connect_time);
1562         nc->ch = ch;
1563         nc->in_fd = in_fd;
1564         nc->ncpus = -1;
1565         net_connects++;
1566 }
1567
1568 /*
1569  * event driven loop, handle new incoming connections and data from
1570  * existing connections
1571  */
1572 static void net_server_handle_connections(int listen_fd,
1573                                           struct sockaddr_in *addr)
1574 {
1575         struct pollfd *pfds = NULL;
1576         struct net_connection **ncs = NULL;
1577         int max_connects = 0;
1578         int i, nconns, events;
1579         struct cl_host *ch;
1580         struct net_connection *nc;
1581         
1582         printf("server: waiting for connections...\n");
1583
1584         while (!is_done()) {
1585                 if (net_connects >= max_connects) {
1586                         pfds = realloc(pfds, (net_connects + 1) * sizeof(*pfds));
1587                         ncs = realloc(ncs, (net_connects + 1) * sizeof(*ncs));
1588                         max_connects = net_connects + 1;
1589                 }
1590                 /*
1591                  * the zero entry is for incoming connections, remaining
1592                  * entries for clients
1593                  */
1594                 pfds[0].fd = listen_fd;
1595                 pfds[0].events = POLLIN;
1596                 nconns = 0;
1597                 for_each_cl_host(ch) {
1598                         for (i = 0; i < ch->nconn; i++) {
1599                                 nc = &ch->net_connections[i];
1600                                 pfds[nconns + 1].fd = nc->in_fd;
1601                                 pfds[nconns + 1].events = POLLIN;
1602                                 ncs[nconns++] = nc;
1603                         }
1604                 }
1605
1606                 events = poll(pfds, 1 + nconns, -1);
1607                 if (events < 0) {
1608                         if (errno == EINTR)
1609                                 continue;
1610
1611                         perror("poll");
1612                         break;
1613                 } else if (!events)
1614                         continue;
1615
1616                 if (pfds[0].revents & POLLIN) {
1617                         net_add_connection(listen_fd, addr);
1618                         events--;
1619                 }
1620
1621                 for (i = 0; events && i < nconns; i++) {
1622                         if (pfds[i + 1].revents & POLLIN) {
1623                                 net_client_data(ncs[i]);
1624                                 events--;
1625                         }
1626                 }
1627         }
1628 }
1629
1630 /*
1631  * Start here when we are in server mode - just fetch data from the network
1632  * and dump to files
1633  */
1634 static int net_server(void)
1635 {
1636         struct sockaddr_in addr;
1637         int fd, opt;
1638
1639         fd = socket(AF_INET, SOCK_STREAM, 0);
1640         if (fd < 0) {
1641                 perror("server: socket");
1642                 return 1;
1643         }
1644
1645         opt = 1;
1646         if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
1647                 perror("setsockopt");
1648                 return 1;
1649         }
1650
1651         memset(&addr, 0, sizeof(addr));
1652         addr.sin_family = AF_INET;
1653         addr.sin_addr.s_addr = htonl(INADDR_ANY);
1654         addr.sin_port = htons(net_port);
1655
1656         if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
1657                 perror("bind");
1658                 return 1;
1659         }
1660
1661         if (listen(fd, 1) < 0) {
1662                 perror("listen");
1663                 return 1;
1664         }
1665
1666         net_server_handle_connections(fd, &addr);
1667         return 0;
1668 }
1669
1670 /*
1671  * Setup outgoing network connection where we will transmit data
1672  */
1673 static int net_setup_client_cpu(int i, struct sockaddr_in *addr)
1674 {
1675         int fd;
1676
1677         fd = socket(AF_INET, SOCK_STREAM, 0);
1678         if (fd < 0) {
1679                 perror("client: socket");
1680                 return 1;
1681         }
1682
1683         if (connect(fd, (struct sockaddr *) addr, sizeof(*addr)) < 0) {
1684                 perror("client: connect");
1685                 return 1;
1686         }
1687
1688         net_out_fd[i] = fd;
1689         return 0;
1690 }
1691
1692 static int net_setup_client(void)
1693 {
1694         struct sockaddr_in addr;
1695         int i;
1696
1697         memset(&addr, 0, sizeof(addr));
1698         addr.sin_family = AF_INET;
1699         addr.sin_port = htons(net_port);
1700
1701         if (inet_aton(hostname, &addr.sin_addr) != 1) {
1702                 struct hostent *hent = gethostbyname(hostname);
1703                 if (!hent) {
1704                         perror("gethostbyname");
1705                         return 1;
1706                 }
1707
1708                 memcpy(&addr.sin_addr, hent->h_addr, 4);
1709                 strcpy(hostname, hent->h_name);
1710         }
1711
1712         printf("blktrace: connecting to %s\n", hostname);
1713
1714         net_out_fd = malloc(ncpus * sizeof(*net_out_fd));
1715         for (i = 0; i < ncpus; i++) {
1716                 if (net_setup_client_cpu(i, &addr))
1717                         return 1;
1718         }
1719
1720         printf("blktrace: connected!\n");
1721         
1722         return 0;
1723 }
1724
1725 static char usage_str[] = \
1726         "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
1727         "[ -a action ] [ -A action mask ] [ -v ]\n\n" \
1728         "\t-d Use specified device. May also be given last after options\n" \
1729         "\t-r Path to mounted debugfs, defaults to /debug\n" \
1730         "\t-o File(s) to send output to\n" \
1731         "\t-D Directory to prepend to output file names\n" \
1732         "\t-k Kill a running trace\n" \
1733         "\t-w Stop after defined time, in seconds\n" \
1734         "\t-a Only trace specified actions. See documentation\n" \
1735         "\t-A Give trace mask as a single value. See documentation\n" \
1736         "\t-b Sub buffer size in KiB\n" \
1737         "\t-n Number of sub buffers\n" \
1738         "\t-l Run in network listen mode (blktrace server)\n" \
1739         "\t-h Run in network client mode, connecting to the given host\n" \
1740         "\t-p Network port to use (default 8462)\n" \
1741         "\t-s Make the network client NOT use sendfile() to transfer data\n" \
1742         "\t-V Print program version info\n\n";
1743
1744 static void show_usage(char *program)
1745 {
1746         fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
1747 }
1748
1749 int main(int argc, char *argv[])
1750 {
1751         static char default_debugfs_path[] = "/debug";
1752         struct statfs st;
1753         int i, c;
1754         int stop_watch = 0;
1755         int act_mask_tmp = 0;
1756
1757         while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) >= 0) {
1758                 switch (c) {
1759                 case 'a':
1760                         i = find_mask_map(optarg);
1761                         if (i < 0) {
1762                                 fprintf(stderr,"Invalid action mask %s\n",
1763                                         optarg);
1764                                 return 1;
1765                         }
1766                         act_mask_tmp |= i;
1767                         break;
1768
1769                 case 'A':
1770                         if ((sscanf(optarg, "%x", &i) != 1) || 
1771                                                         !valid_act_opt(i)) {
1772                                 fprintf(stderr,
1773                                         "Invalid set action mask %s/0x%x\n",
1774                                         optarg, i);
1775                                 return 1;
1776                         }
1777                         act_mask_tmp = i;
1778                         break;
1779
1780                 case 'd':
1781                         if (resize_devices(optarg) != 0)
1782                                 return 1;
1783                         break;
1784
1785                 case 'r':
1786                         debugfs_path = optarg;
1787                         break;
1788
1789                 case 'o':
1790                         output_name = optarg;
1791                         break;
1792                 case 'k':
1793                         kill_running_trace = 1;
1794                         break;
1795                 case 'w':
1796                         stop_watch = atoi(optarg);
1797                         if (stop_watch <= 0) {
1798                                 fprintf(stderr,
1799                                         "Invalid stopwatch value (%d secs)\n",
1800                                         stop_watch);
1801                                 return 1;
1802                         }
1803                         break;
1804                 case 'V':
1805                         printf("%s version %s\n", argv[0], blktrace_version);
1806                         return 0;
1807                 case 'b':
1808                         buf_size = strtoul(optarg, NULL, 10);
1809                         if (buf_size <= 0 || buf_size > 16*1024) {
1810                                 fprintf(stderr,
1811                                         "Invalid buffer size (%lu)\n",buf_size);
1812                                 return 1;
1813                         }
1814                         buf_size <<= 10;
1815                         break;
1816                 case 'n':
1817                         buf_nr = strtoul(optarg, NULL, 10);
1818                         if (buf_nr <= 0) {
1819                                 fprintf(stderr,
1820                                         "Invalid buffer nr (%lu)\n", buf_nr);
1821                                 return 1;
1822                         }
1823                         break;
1824                 case 'D':
1825                         output_dir = optarg;
1826                         break;
1827                 case 'h':
1828                         net_mode = Net_client;
1829                         strcpy(hostname, optarg);
1830                         break;
1831                 case 'l':
1832                         net_mode = Net_server;
1833                         break;
1834                 case 'p':
1835                         net_port = atoi(optarg);
1836                         break;
1837                 case 's':
1838                         net_use_sendfile = 0;
1839                         break;
1840                 default:
1841                         show_usage(argv[0]);
1842                         return 1;
1843                 }
1844         }
1845
1846         setlocale(LC_NUMERIC, "en_US");
1847
1848         page_size = getpagesize();
1849
1850         if (net_mode == Net_server)
1851                 return net_server();
1852
1853         while (optind < argc) {
1854                 if (resize_devices(argv[optind++]) != 0)
1855                         return 1;
1856         }
1857
1858         if (ndevs == 0) {
1859                 show_usage(argv[0]);
1860                 return 1;
1861         }
1862
1863         if (act_mask_tmp != 0)
1864                 act_mask = act_mask_tmp;
1865
1866         if (!debugfs_path)
1867                 debugfs_path = default_debugfs_path;
1868
1869         if (statfs(debugfs_path, &st) < 0) {
1870                 perror("statfs");
1871                 fprintf(stderr,"%s does not appear to be a valid path\n",
1872                         debugfs_path);
1873                 return 1;
1874         } else if (st.f_type != (long) DEBUGFS_TYPE) {
1875                 fprintf(stderr,"%s does not appear to be a debug filesystem\n",
1876                         debugfs_path);
1877                 return 1;
1878         }
1879
1880         if (open_devices() != 0)
1881                 return 1;
1882
1883         if (kill_running_trace) {
1884                 stop_all_traces();
1885                 return 0;
1886         }
1887
1888         ncpus = sysconf(_SC_NPROCESSORS_ONLN);
1889         if (ncpus < 0) {
1890                 fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
1891                 return 1;
1892         }
1893
1894         signal(SIGINT, handle_sigint);
1895         signal(SIGHUP, handle_sigint);
1896         signal(SIGTERM, handle_sigint);
1897         signal(SIGALRM, handle_sigint);
1898
1899         if (net_mode == Net_client && net_setup_client())
1900                 return 1;
1901
1902         if (start_devices() != 0)
1903                 return 1;
1904
1905         atexit(stop_all_tracing);
1906
1907         if (stop_watch)
1908                 alarm(stop_watch);
1909
1910         wait_for_threads();
1911
1912         if (!is_trace_stopped()) {
1913                 trace_stopped = 1;
1914                 stop_all_threads();
1915                 stop_all_traces();
1916         }
1917
1918         show_stats(device_information, ndevs, ncpus);
1919
1920         return 0;
1921 }
1922