#include <sys/param.h>
#include <sys/statfs.h>
#include <sys/poll.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
#include <stdio.h>
#include <stdlib.h>
#include <sched.h>
#include <ctype.h>
#include <getopt.h>
#include <errno.h>
-#include <assert.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <sys/sendfile.h>
#include "blktrace.h"
-#include "list.h"
+#include "barrier.h"
static char blktrace_version[] = "0.99";
#define RELAYFS_TYPE 0xF0B4A981
-#define RING_INIT_NR (2)
-#define RING_MAX_NR (16UL)
-
-#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:"
+#define S_OPTS "d:a:A:r:o:kw:Vb:n:D:lh:p:s"
static struct option l_opts[] = {
{
.name = "dev",
.flag = NULL,
.val = 'D'
},
+ {
+ .name = "listen",
+ .has_arg = no_argument,
+ .flag = NULL,
+ .val = 'l'
+ },
+ {
+ .name = "host",
+ .has_arg = required_argument,
+ .flag = NULL,
+ .val = 'h'
+ },
+ {
+ .name = "port",
+ .has_arg = required_argument,
+ .flag = NULL,
+ .val = 'p'
+ },
+ {
+ .name = "sendfile",
+ .has_arg = no_argument,
+ .flag = NULL,
+ .val = 's'
+ },
{
.name = NULL,
}
};
struct tip_subbuf {
- struct list_head list;
void *buf;
unsigned int len;
unsigned int max_len;
};
+#define FIFO_SIZE (1024) /* should be plenty big! */
+#define CL_SIZE (128) /* cache line, any bigger? */
+
+struct tip_subbuf_fifo {
+ int tail __attribute__((aligned(CL_SIZE)));
+ int head __attribute__((aligned(CL_SIZE)));
+ struct tip_subbuf *q[FIFO_SIZE];
+};
+
struct thread_information {
int cpu;
pthread_t thread;
int fd;
void *fd_buf;
- unsigned long fd_off;
- unsigned long fd_size;
- unsigned long fd_max_size;
char fn[MAXPATHLEN + 64];
+ struct in_addr cl_in_addr;
+
FILE *ofile;
char *ofile_buffer;
+ off_t ofile_offset;
int ofile_stdout;
+ int ofile_mmap;
+ volatile int sendfile_pending;
+
+ int (*get_subbuf)(struct thread_information *, unsigned int);
+ int (*flush_subbuf)(struct thread_information *, struct tip_subbuf *);
+ int (*read_data)(struct thread_information *, void *, unsigned int);
unsigned long events_processed;
+ unsigned long long data_read;
+ unsigned long long data_queued;
struct device_information *device;
int exited;
- pthread_mutex_t lock;
- struct list_head subbuf_list;
+ /*
+ * piped fifo buffers
+ */
+ struct tip_subbuf_fifo fifo;
struct tip_subbuf *leftover_ts;
- unsigned int leftover_ts_offset;
+
+ /*
+ * mmap controlled output files
+ */
+ unsigned long long fs_size;
+ unsigned long long fs_max_size;
+ unsigned long fs_off;
+ void *fs_buf;
+ unsigned long fs_buf_len;
};
struct device_information {
static int kill_running_trace;
static unsigned long buf_size = BUF_SIZE;
static unsigned long buf_nr = BUF_NR;
+static unsigned int page_size;
#define is_done() (*(volatile int *)(&done))
static volatile int done;
#define is_stat_shown() (*(volatile int *)(&stat_shown))
static volatile int stat_shown;
+int data_is_native = -1;
+
static void exit_trace(int status);
#define dip_tracing(dip) (*(volatile int *)(&(dip)->trace_started))
#define for_each_tip(__d, __t, __j) \
for (__j = 0, __t = (__d)->threads; __j < ncpus; __j++, __t++)
+/*
+ * networking stuff follows. we include a magic number so we know whether
+ * to endianness convert or not
+ */
+struct blktrace_net_hdr {
+ u32 magic; /* same as trace magic */
+ char buts_name[32]; /* trace name */
+ u32 cpu; /* for which cpu */
+ u32 max_cpus;
+ u32 len; /* length of following trace data */
+};
+
+#define TRACE_NET_PORT (8462)
+
+enum {
+ Net_none = 0,
+ Net_server,
+ Net_client,
+};
+
+/*
+ * network cmd line params
+ */
+static char hostname[MAXHOSTNAMELEN];
+static int net_port = TRACE_NET_PORT;
+static int net_mode = 0;
+static int net_use_sendfile;
+
+static int net_in_fd = -1;
+static int net_out_fd = -1;
+
+static void handle_sigint(__attribute__((__unused__)) int sig)
+{
+ struct device_information *dip;
+ int i;
+
+ /*
+ * stop trace so we can reap currently produced data
+ */
+ for_each_dip(dip, i) {
+ if (dip->fd == -1)
+ continue;
+ if (ioctl(dip->fd, BLKTRACESTOP) < 0)
+ perror("BLKTRACESTOP");
+ }
+
+ done = 1;
+}
+
static int get_dropped_count(const char *buts_name)
{
int fd;
buts.buf_nr = buf_nr;
buts.act_mask = act_mask;
- if (ioctl(dip->fd, BLKSTARTTRACE, &buts) < 0) {
- perror("BLKSTARTTRACE");
+ if (ioctl(dip->fd, BLKTRACESETUP, &buts) < 0) {
+ perror("BLKTRACESETUP");
+ return 1;
+ }
+
+ if (ioctl(dip->fd, BLKTRACESTART) < 0) {
+ perror("BLKTRACESTART");
return 1;
}
if (dip_tracing(dip) || kill_running_trace) {
dip_set_tracing(dip, 0);
- if (ioctl(dip->fd, BLKSTOPTRACE) < 0)
- perror("BLKSTOPTRACE");
+ /*
+ * should be stopped, just don't complain if it isn't
+ */
+ ioctl(dip->fd, BLKTRACESTOP);
+
+ if (ioctl(dip->fd, BLKTRACETEARDOWN) < 0)
+ perror("BLKTRACETEARDOWN");
close(dip->fd);
dip->fd = -1;
struct pollfd pfd = { .fd = tip->fd, .events = POLLIN };
do {
- poll(&pfd, 1, 10);
+ poll(&pfd, 1, 100);
if (pfd.revents & POLLIN)
break;
if (tip->ofile_stdout)
} while (!is_done());
}
-static int read_data(struct thread_information *tip, void *buf, int len)
+static int read_data_file(struct thread_information *tip, void *buf,
+ unsigned int len)
{
int ret = 0;
} while (!is_done());
return ret;
+
}
-static inline void tip_fd_unlock(struct thread_information *tip)
+static int read_data_net(struct thread_information *tip, void *buf,
+ unsigned int len)
{
- pthread_mutex_unlock(&tip->lock);
+ unsigned int bytes_left = len;
+ int ret = 0;
+
+ do {
+ ret = recv(net_in_fd, buf, bytes_left, MSG_WAITALL);
+
+ if (!ret)
+ continue;
+ else if (ret < 0) {
+ if (errno != EAGAIN) {
+ perror(tip->fn);
+ fprintf(stderr, "server: failed read\n");
+ return 0;
+ }
+ continue;
+ } else {
+ buf += ret;
+ bytes_left -= ret;
+ }
+ } while (!is_done() && bytes_left);
+
+ return len - bytes_left;
}
-static inline void tip_fd_lock(struct thread_information *tip)
+static int read_data(struct thread_information *tip, void *buf,
+ unsigned int len)
{
- pthread_mutex_lock(&tip->lock);
+ return tip->read_data(tip, buf, len);
}
-static int get_subbuf(struct thread_information *tip)
+static inline struct tip_subbuf *
+subbuf_fifo_dequeue(struct thread_information *tip)
{
- struct tip_subbuf *ts;
- int ts_size, ret;
+ const int head = tip->fifo.head;
+ const int next = (head + 1) & (FIFO_SIZE - 1);
+
+ if (head != tip->fifo.tail) {
+ struct tip_subbuf *ts = tip->fifo.q[head];
+
+ store_barrier();
+ tip->fifo.head = next;
+ return ts;
+ }
+
+ return NULL;
+}
+
+static inline int subbuf_fifo_queue(struct thread_information *tip,
+ struct tip_subbuf *ts)
+{
+ const int tail = tip->fifo.tail;
+ const int next = (tail + 1) & (FIFO_SIZE - 1);
+
+ if (next != tip->fifo.head) {
+ tip->fifo.q[tail] = ts;
+ store_barrier();
+ tip->fifo.tail = next;
+ return 0;
+ }
+
+ fprintf(stderr, "fifo too small!\n");
+ return 1;
+}
+
+/*
+ * For file output, truncate and mmap the file appropriately
+ */
+static int mmap_subbuf(struct thread_information *tip, unsigned int maxlen)
+{
+ int ofd = fileno(tip->ofile);
+ int ret;
/*
- * live tracing should get a lower count to make it more "realtime"
+ * extend file, if we have to. use chunks of 16 subbuffers.
*/
- if (tip->ofile_stdout)
- ts_size = 1024;
- else
- ts_size = buf_size;
+ if (tip->fs_off + buf_size > tip->fs_buf_len) {
+ if (tip->fs_buf) {
+ munlock(tip->fs_buf, tip->fs_buf_len);
+ munmap(tip->fs_buf, tip->fs_buf_len);
+ tip->fs_buf = NULL;
+ }
- ts = malloc(sizeof(*ts));
- ts->buf = malloc(ts_size);
- ts->max_len = ts_size;
+ tip->fs_off = tip->fs_size & (page_size - 1);
+ tip->fs_buf_len = (16 * buf_size) - tip->fs_off;
+ tip->fs_max_size += tip->fs_buf_len;
+
+ if (ftruncate(ofd, tip->fs_max_size) < 0) {
+ perror("ftruncate");
+ return -1;
+ }
- ret = read_data(tip, ts->buf, ts_size);
+ tip->fs_buf = mmap(NULL, tip->fs_buf_len, PROT_WRITE,
+ MAP_SHARED, ofd, tip->fs_size - tip->fs_off);
+ if (tip->fs_buf == MAP_FAILED) {
+ perror("mmap");
+ return -1;
+ }
+ mlock(tip->fs_buf, tip->fs_buf_len);
+ }
+
+ ret = read_data(tip, tip->fs_buf + tip->fs_off, maxlen);
+ if (ret >= 0) {
+ tip->data_read += ret;
+ tip->fs_size += ret;
+ tip->fs_off += ret;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * Use the copy approach for pipes and network
+ */
+static int get_subbuf(struct thread_information *tip, unsigned int maxlen)
+{
+ struct tip_subbuf *ts = malloc(sizeof(*ts));
+ int ret;
+
+ ts->buf = malloc(buf_size);
+ ts->max_len = maxlen;
+
+ ret = read_data(tip, ts->buf, ts->max_len);
if (ret > 0) {
ts->len = ret;
- tip_fd_lock(tip);
- list_add_tail(&ts->list, &tip->subbuf_list);
- tip_fd_unlock(tip);
+ tip->data_read += ret;
+ if (subbuf_fifo_queue(tip, ts))
+ return -1;
+ }
+
+ return ret;
+}
+
+static int get_subbuf_sendfile(struct thread_information *tip,
+ unsigned int maxlen)
+{
+ struct tip_subbuf *ts;
+ struct stat sb;
+ unsigned int ready;
+
+ wait_for_data(tip);
+
+ /*
+ * hack to get last data out, we can't use sendfile for that
+ */
+ if (is_done())
+ return get_subbuf(tip, maxlen);
+
+ if (tip->sendfile_pending) {
+ usleep(100);
return 0;
}
- free(ts->buf);
- free(ts);
- return -1;
+ if (fstat(tip->fd, &sb) < 0) {
+ perror("trace stat");
+ return -1;
+ }
+ ready = sb.st_size - tip->data_queued;
+ if (!ready)
+ return 0;
+
+ ts = malloc(sizeof(*ts));
+ ts->buf = NULL;
+ ts->max_len = 0;
+ ts->len = ready;
+ tip->data_queued += ready;
+
+ if (subbuf_fifo_queue(tip, ts))
+ return -1;
+
+ tip->sendfile_pending++;
+ return ready;
}
static void close_thread(struct thread_information *tip)
tip->fd_buf = NULL;
}
+static void tip_ftrunc_final(struct thread_information *tip)
+{
+ /*
+ * truncate to right size and cleanup mmap
+ */
+ if (tip->ofile_mmap) {
+ int ofd = fileno(tip->ofile);
+
+ if (tip->fs_buf)
+ munmap(tip->fs_buf, tip->fs_buf_len);
+
+ ftruncate(ofd, tip->fs_size);
+ }
+}
+
static void *thread_main(void *arg)
{
struct thread_information *tip = arg;
exit_trace(1);
}
- for (;;) {
- if (get_subbuf(tip))
+ while (!is_done()) {
+ if (tip->get_subbuf(tip, buf_size) < 0)
break;
}
+ /*
+ * trace is stopped, pull data until we get a short read
+ */
+ while (tip->get_subbuf(tip, buf_size) > 0)
+ ;
+
+ tip_ftrunc_final(tip);
tip->exited = 1;
return NULL;
}
-static int write_data(struct thread_information *tip,
- void *buf, unsigned int buf_len)
+static int write_data_net(int fd, void *buf, unsigned int buf_len)
{
+ unsigned int bytes_left = buf_len;
int ret;
+ while (bytes_left) {
+ ret = send(fd, buf, bytes_left, 0);
+ if (ret < 0) {
+ perror("send");
+ return 1;
+ }
+
+ buf += ret;
+ bytes_left -= ret;
+ }
+
+ return 0;
+}
+
+static int net_send_header(struct thread_information *tip, unsigned int len)
+{
+ struct blktrace_net_hdr hdr;
+
+ hdr.magic = BLK_IO_TRACE_MAGIC;
+ strcpy(hdr.buts_name, tip->device->buts_name);
+ hdr.cpu = tip->cpu;
+ hdr.max_cpus = ncpus;
+ hdr.len = len;
+
+ return write_data_net(net_out_fd, &hdr, sizeof(hdr));
+}
+
+/*
+ * send header with 0 length to signal end-of-run
+ */
+static void net_client_send_close(void)
+{
+ struct blktrace_net_hdr hdr;
+
+ hdr.magic = BLK_IO_TRACE_MAGIC;
+ hdr.cpu = 0;
+ hdr.max_cpus = ncpus;
+ hdr.len = 0;
+
+ write_data_net(net_out_fd, &hdr, sizeof(hdr));
+}
+
+static int flush_subbuf_net(struct thread_information *tip,
+ struct tip_subbuf *ts)
+{
+ if (net_send_header(tip, ts->len))
+ return 1;
+ if (write_data_net(net_out_fd, ts->buf, ts->len))
+ return 1;
+
+ free(ts->buf);
+ free(ts);
+ return 0;
+}
+
+static int net_sendfile(struct thread_information *tip, struct tip_subbuf *ts)
+{
+ int ret = sendfile(net_out_fd, tip->fd, NULL, ts->len);
+
+ if (ret < 0) {
+ perror("sendfile");
+ return 1;
+ } else if (ret < (int) ts->len) {
+ fprintf(stderr, "short sendfile send (%d of %d)\n", ret, ts->len);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int flush_subbuf_sendfile(struct thread_information *tip,
+ struct tip_subbuf *ts)
+{
+ int ret = 1;
+
+ /*
+ * currently we cannot use sendfile() on the last bytes read, as they
+ * may not be a full subbuffer. get_subbuf_sendfile() falls back to
+ * the read approach for those, so use send() to ship them out
+ */
+ if (ts->buf)
+ return flush_subbuf_net(tip, ts);
+
+ if (net_send_header(tip, ts->len))
+ goto err;
+ if (net_sendfile(tip, ts))
+ goto err;
+
+ tip->data_read += ts->len;
+ tip->ofile_offset += buf_size;
+ ret = 0;
+err:
+ tip->sendfile_pending--;
+ free(ts);
+ return ret;
+}
+
+static int write_data(struct thread_information *tip, void *buf,
+ unsigned int buf_len)
+{
+ int ret;
+
+ if (!buf_len)
+ return 0;
+
while (1) {
ret = fwrite(buf, buf_len, 1, tip->ofile);
if (ret == 1)
return 0;
}
-static int flush_subbuf(struct thread_information *tip, struct tip_subbuf *ts)
+static int flush_subbuf_file(struct thread_information *tip,
+ struct tip_subbuf *ts)
{
unsigned int offset = 0;
struct blk_io_trace *t;
if (tip->leftover_ts) {
struct tip_subbuf *prev_ts = tip->leftover_ts;
- offset = tip->leftover_ts_offset;
- if (offset + prev_ts->len + ts->len > prev_ts->max_len) {
+ if (prev_ts->len + ts->len > prev_ts->max_len) {
prev_ts->max_len += ts->len;
prev_ts->buf = realloc(prev_ts->buf, prev_ts->max_len);
}
- memcpy(prev_ts->buf + offset + ts->len, ts->buf, ts->len);
+ memcpy(prev_ts->buf + prev_ts->len, ts->buf, ts->len);
prev_ts->len += ts->len;
free(ts->buf);
while (offset + sizeof(*t) <= ts->len) {
t = ts->buf + offset;
- if (verify_trace(t))
+ if (verify_trace(t)) {
+ write_data(tip, ts->buf, offset);
return -1;
+ }
pdu_len = t->pdu_len;
if (offset + sizeof(*t) + pdu_len > ts->len)
break;
- trace_to_be(t);
-
- if (write_data(tip, t, sizeof(*t) + pdu_len))
- return -1;
-
offset += sizeof(*t) + pdu_len;
tip->events_processed++;
+ tip->data_read += sizeof(*t) + pdu_len;
events++;
}
+ if (write_data(tip, ts->buf, offset))
+ return -1;
+
/*
* leftover bytes, save them for next time
*/
if (offset != ts->len) {
tip->leftover_ts = ts;
- tip->leftover_ts_offset = offset;
+ ts->len -= offset;
+ memmove(ts->buf, ts->buf + offset, ts->len);
} else {
free(ts->buf);
free(ts);
static int write_tip_events(struct thread_information *tip)
{
- struct tip_subbuf *ts = NULL;
-
- tip_fd_lock(tip);
- if (!list_empty(&tip->subbuf_list)) {
- ts = list_entry(tip->subbuf_list.next, struct tip_subbuf, list);
- list_del(&ts->list);
- }
- tip_fd_unlock(tip);
+ struct tip_subbuf *ts = subbuf_fifo_dequeue(tip);
if (ts)
- return flush_subbuf(tip, ts);
+ return tip->flush_subbuf(tip, ts);
return 0;
}
{
struct device_information *dip;
struct thread_information *tip;
- int i, j, events, ret, all_exited;
+ int i, j, events, ret, tips_running;
while (!is_done()) {
events = 0;
*/
do {
events = 0;
- all_exited = 0;
+ tips_running = 0;
for_each_dip(dip, i) {
for_each_tip(dip, tip, j) {
ret = write_tip_events(tip);
if (ret > 0)
events += ret;
- all_exited += !tip->exited;
+ tips_running += !tip->exited;
}
}
usleep(10);
- } while (events || !all_exited);
+ } while (events || tips_running);
+}
+
+static void wait_for_threads(void)
+{
+ /*
+ * for piped or network output, poll and fetch data for writeout.
+ * for files, we just wait around for trace threads to exit
+ */
+ if ((output_name && !strcmp(output_name, "-")) ||
+ net_mode == Net_client)
+ get_and_write_events();
+ else {
+ struct device_information *dip;
+ struct thread_information *tip;
+ int i, j, tips_running;
+
+ do {
+ tips_running = 0;
+ usleep(1000);
+
+ for_each_dip(dip, i)
+ for_each_tip(dip, tip, j)
+ tips_running += !tip->exited;
+ } while (tips_running);
+ }
+
+ if (net_mode == Net_client)
+ net_client_send_close();
+}
+
+static int fill_ofname(struct thread_information *tip, char *dst,
+ char *buts_name)
+{
+ struct stat sb;
+ int len = 0;
+ time_t t;
+
+ if (output_dir)
+ len = sprintf(dst, "%s/", output_dir);
+
+ if (net_mode == Net_server) {
+ len += sprintf(dst + len, "%s-", inet_ntoa(tip->cl_in_addr));
+ time(&t);
+ len += strftime(dst + len, 64, "%F-%T/", gmtime(&t));
+ }
+
+ if (stat(dst, &sb) < 0) {
+ if (errno != ENOENT) {
+ perror("stat");
+ return 1;
+ }
+ if (mkdir(dst, 0755) < 0) {
+ perror(dst);
+ fprintf(stderr, "Can't make output dir\n");
+ return 1;
+ }
+ }
+
+ if (output_name)
+ sprintf(dst + len, "%s.blktrace.%d", output_name, tip->cpu);
+ else
+ sprintf(dst + len, "%s.blktrace.%d", buts_name, tip->cpu);
+
+ return 0;
+}
+
+static void fill_ops(struct thread_information *tip)
+{
+ /*
+ * setup ops
+ */
+ if (net_mode == Net_client) {
+ if (net_use_sendfile) {
+ tip->get_subbuf = get_subbuf_sendfile;
+ tip->flush_subbuf = flush_subbuf_sendfile;
+ } else {
+ tip->get_subbuf = get_subbuf;
+ tip->flush_subbuf = flush_subbuf_net;
+ }
+ } else {
+ if (tip->ofile_mmap)
+ tip->get_subbuf = mmap_subbuf;
+ else
+ tip->get_subbuf = get_subbuf;
+
+ tip->flush_subbuf = flush_subbuf_file;
+ }
+
+ if (net_mode == Net_server)
+ tip->read_data = read_data_net;
+ else
+ tip->read_data = read_data_file;
+}
+
+static int tip_open_output(struct device_information *dip,
+ struct thread_information *tip)
+{
+ int pipeline = output_name && !strcmp(output_name, "-");
+ int mode, vbuf_size;
+ char op[128];
+
+ if (net_mode == Net_client) {
+ tip->ofile = NULL;
+ tip->ofile_stdout = 0;
+ tip->ofile_mmap = 0;
+ goto done;
+ } else if (pipeline) {
+ tip->ofile = fdopen(STDOUT_FILENO, "w");
+ tip->ofile_stdout = 1;
+ tip->ofile_mmap = 0;
+ mode = _IOLBF;
+ vbuf_size = 512;
+ } else {
+ if (fill_ofname(tip, op, dip->buts_name))
+ return 1;
+ tip->ofile = fopen(op, "w+");
+ tip->ofile_stdout = 0;
+ tip->ofile_mmap = 1;
+ mode = _IOFBF;
+ vbuf_size = OFILE_BUF;
+ }
+
+ if (tip->ofile == NULL) {
+ perror(op);
+ return 1;
+ }
+
+ tip->ofile_buffer = malloc(vbuf_size);
+ if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
+ perror("setvbuf");
+ close_thread(tip);
+ return 1;
+ }
+
+done:
+ fill_ops(tip);
+ return 0;
}
static int start_threads(struct device_information *dip)
{
struct thread_information *tip;
- char op[64];
- int j, pipeline = output_name && !strcmp(output_name, "-");
- int len, mode, vbuf_size;
+ int j;
for_each_tip(dip, tip, j) {
tip->cpu = j;
tip->device = dip;
tip->events_processed = 0;
- INIT_LIST_HEAD(&tip->subbuf_list);
+ tip->fd = -1;
+ memset(&tip->fifo, 0, sizeof(tip->fifo));
tip->leftover_ts = NULL;
- tip->leftover_ts_offset = 0;
-
- if (pipeline) {
- tip->ofile = fdopen(STDOUT_FILENO, "w");
- tip->ofile_stdout = 1;
- mode = _IOLBF;
- vbuf_size = 512;
- } else {
- len = 0;
-
- if (output_dir)
- len = sprintf(op, "%s/", output_dir);
- if (output_name) {
- sprintf(op + len, "%s.blktrace.%d", output_name,
- tip->cpu);
- } else {
- sprintf(op + len, "%s.blktrace.%d",
- dip->buts_name, tip->cpu);
- }
- tip->ofile = fopen(op, "w");
- tip->ofile_stdout = 0;
- mode = _IOFBF;
- vbuf_size = OFILE_BUF;
- }
-
- if (tip->ofile == NULL) {
- perror(op);
+ if (tip_open_output(dip, tip))
return 1;
- }
-
- tip->ofile_buffer = malloc(vbuf_size);
- if (setvbuf(tip->ofile, tip->ofile_buffer, mode, vbuf_size)) {
- perror("setvbuf");
- close_thread(tip);
- return 1;
- }
if (pthread_create(&tip->thread, NULL, thread_main, tip)) {
perror("pthread_create");
{
struct device_information *dip;
struct thread_information *tip;
- unsigned long long events_processed;
+ unsigned long long events_processed, data_read;
unsigned long total_drops;
int i, j, no_stdout = 0;
if (!no_stdout)
printf("Device: %s\n", dip->path);
events_processed = 0;
+ data_read = 0;
for_each_tip(dip, tip, j) {
if (!no_stdout)
- printf(" CPU%3d: %20ld events\n",
- tip->cpu, tip->events_processed);
+ printf(" CPU%3d: %20lu events, %8llu KiB data\n",
+ tip->cpu, tip->events_processed,
+ (tip->data_read + 1023) >> 10);
events_processed += tip->events_processed;
+ data_read += tip->data_read;
}
total_drops += dip->drop_count;
if (!no_stdout)
- printf(" Total: %20lld events (dropped %lu)\n",
- events_processed, dip->drop_count);
+ printf(" Total: %20llu events (dropped %lu), %8llu KiB data\n",
+ events_processed, dip->drop_count,
+ (data_read + 1023) >> 10);
}
if (total_drops)
fprintf(stderr, "You have dropped events, consider using a larger buffer size (-b)\n");
}
+static struct device_information *net_get_dip(char *buts_name,
+ struct in_addr *cl_in_addr)
+{
+ struct device_information *dip;
+ int i;
+
+ for (i = 0; i < ndevs; i++) {
+ dip = &device_information[i];
+
+ if (!strcmp(dip->buts_name, buts_name))
+ return dip;
+ }
+
+ device_information = realloc(device_information, (ndevs + 1) * sizeof(*dip));
+ dip = &device_information[ndevs];
+ memset(dip, 0, sizeof(*dip));
+ dip->fd = -1;
+ strcpy(dip->buts_name, buts_name);
+ dip->path = strdup(buts_name);
+ ndevs++;
+ dip->threads = malloc(ncpus * sizeof(struct thread_information));
+ memset(dip->threads, 0, ncpus * sizeof(struct thread_information));
+
+ /*
+ * open all files
+ */
+ for (i = 0; i < ncpus; i++) {
+ struct thread_information *tip = &dip->threads[i];
+
+ tip->cpu = i;
+ tip->device = dip;
+ tip->fd = -1;
+ tip->cl_in_addr = *cl_in_addr;
+
+ if (tip_open_output(dip, tip))
+ return NULL;
+ }
+
+ return dip;
+}
+
+static struct thread_information *net_get_tip(struct blktrace_net_hdr *bnh,
+ struct in_addr *cl_in_addr)
+{
+ struct device_information *dip;
+
+ ncpus = bnh->max_cpus;
+ dip = net_get_dip(bnh->buts_name, cl_in_addr);
+ return &dip->threads[bnh->cpu];
+}
+
+static int net_get_header(struct blktrace_net_hdr *bnh)
+{
+ int fl = fcntl(net_in_fd, F_GETFL);
+ int bytes_left, ret;
+ void *p = bnh;
+
+ fcntl(net_in_fd, F_SETFL, fl | O_NONBLOCK);
+ bytes_left = sizeof(*bnh);
+ while (bytes_left && !is_done()) {
+ ret = recv(net_in_fd, p, bytes_left, MSG_WAITALL);
+ if (ret < 0) {
+ if (errno != EAGAIN) {
+ perror("recv header");
+ return 1;
+ }
+ usleep(100);
+ continue;
+ } else if (!ret) {
+ usleep(100);
+ continue;
+ } else {
+ p += ret;
+ bytes_left -= ret;
+ }
+ }
+ fcntl(net_in_fd, F_SETFL, fl & ~O_NONBLOCK);
+ return bytes_left;
+}
+
+static int net_server_loop(struct in_addr *cl_in_addr)
+{
+ struct thread_information *tip;
+ struct blktrace_net_hdr bnh;
+
+ if (net_get_header(&bnh))
+ return 1;
+
+ if (data_is_native == -1 && check_data_endianness(bnh.magic)) {
+ fprintf(stderr, "server: received data is bad\n");
+ return 1;
+ }
+
+ if (!data_is_native) {
+ bnh.magic = be32_to_cpu(bnh.magic);
+ bnh.cpu = be32_to_cpu(bnh.cpu);
+ bnh.len = be32_to_cpu(bnh.len);
+ }
+
+ if ((bnh.magic & 0xffffff00) != BLK_IO_TRACE_MAGIC) {
+ fprintf(stderr, "server: bad data magic\n");
+ return 1;
+ }
+
+ /*
+ * len == 0 means that the other end signalled end-of-run
+ */
+ if (!bnh.len) {
+ fprintf(stderr, "server: end of run\n");
+ return 1;
+ }
+
+ tip = net_get_tip(&bnh, cl_in_addr);
+ if (!tip)
+ return 1;
+
+ if (mmap_subbuf(tip, bnh.len))
+ return 1;
+
+ return 0;
+}
+
+static int get_connection(int fd, struct sockaddr_in *addr)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+ socklen_t socklen;
+
+ printf("blktrace: waiting for incoming connection...\n");
+
+ if (poll(&pfd, 1, -1) < 0) {
+ perror("poll for connection");
+ return 1;
+ }
+ if ((pfd.revents & POLLIN) == 0)
+ return 1;
+
+ socklen = sizeof(*addr);
+ net_in_fd = accept(fd, (struct sockaddr *) addr, &socklen);
+ if (net_in_fd < 0) {
+ perror("accept");
+ return 1;
+ }
+
+ printf("blktrace: connection from %s\n", inet_ntoa(addr->sin_addr));
+ return 0;
+}
+
+/*
+ * Start here when we are in server mode - just fetch data from the network
+ * and dump to files
+ */
+static int net_server(void)
+{
+ struct device_information *dip;
+ struct thread_information *tip;
+ struct sockaddr_in addr;
+ int fd, opt, i, j;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("server: socket");
+ return 1;
+ }
+
+ opt = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) {
+ perror("setsockopt");
+ return 1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = htons(net_port);
+
+ if (bind(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ perror("bind");
+ return 1;
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ return 1;
+ }
+
+repeat:
+ if (get_connection(fd, &addr))
+ return 0;
+
+ while (!is_done()) {
+ if (net_server_loop(&addr.sin_addr))
+ break;
+ }
+
+ for_each_dip(dip, i)
+ for_each_tip(dip, tip, j)
+ tip_ftrunc_final(tip);
+
+ show_stats();
+
+ if (is_done())
+ return 0;
+
+ /*
+ * cleanup for next run
+ */
+ for_each_dip(dip, i) {
+ for_each_tip(dip, tip, j)
+ fclose(tip->ofile);
+
+ free(dip->threads);
+ free(dip->path);
+ }
+
+ free(device_information);
+ device_information = NULL;
+ ncpus = ndevs = 0;
+
+ close(net_in_fd);
+ net_in_fd = -1;
+ stat_shown = 0;
+ goto repeat;
+}
+
+/*
+ * Setup outgoing network connection where we will transmit data
+ */
+static int net_setup_client(void)
+{
+ struct sockaddr_in addr;
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("client: socket");
+ return 1;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(net_port);
+
+ if (inet_aton(hostname, &addr.sin_addr) != 1) {
+ struct hostent *hent = gethostbyname(hostname);
+ if (!hent) {
+ perror("gethostbyname");
+ return 1;
+ }
+
+ memcpy(&addr.sin_addr, hent->h_addr, 4);
+ strcpy(hostname, hent->h_name);
+ }
+
+ printf("blktrace: connecting to %s\n", hostname);
+
+ if (connect(fd, (struct sockaddr *) &addr, sizeof(addr)) < 0) {
+ perror("client: connect");
+ return 1;
+ }
+
+ printf("blktrace: connected!\n");
+ net_out_fd = fd;
+ return 0;
+}
+
static char usage_str[] = \
"-d <dev> [ -r relay path ] [ -o <output> ] [-k ] [ -w time ]\n" \
"[ -a action ] [ -A action mask ] [ -v ]\n\n" \
"\t-A Give trace mask as a single value. See documentation\n" \
"\t-b Sub buffer size in KiB\n" \
"\t-n Number of sub buffers\n" \
- "\t-v Print program version info\n\n";
+ "\t-l Run in network listen mode (blktrace server)\n" \
+ "\t-h Run in network client mode, connecting to the given host\n" \
+ "\t-p Network port to use (default 8462)\n" \
+ "\t-s Make the network client use sendfile() to transfer data\n" \
+ "\t-V Print program version info\n\n";
static void show_usage(char *program)
{
fprintf(stderr, "Usage: %s %s %s",program, blktrace_version, usage_str);
}
-static void handle_sigint(__attribute__((__unused__)) int sig)
-{
- done = 1;
-}
int main(int argc, char *argv[])
{
case 'D':
output_dir = optarg;
break;
+ case 'h':
+ net_mode = Net_client;
+ strcpy(hostname, optarg);
+ break;
+ case 'l':
+ net_mode = Net_server;
+ break;
+ case 'p':
+ net_port = atoi(optarg);
+ break;
+ case 's':
+ net_use_sendfile = 1;
+ break;
default:
show_usage(argv[0]);
return 1;
}
}
+ setlocale(LC_NUMERIC, "en_US");
+
+ page_size = getpagesize();
+
+ if (net_mode == Net_server)
+ return net_server();
+
while (optind < argc) {
if (resize_devices(argv[optind++]) != 0)
return 1;
return 0;
}
- setlocale(LC_NUMERIC, "en_US");
-
ncpus = sysconf(_SC_NPROCESSORS_ONLN);
if (ncpus < 0) {
fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed\n");
return 1;
}
- if (start_devices() != 0)
- return 1;
-
signal(SIGINT, handle_sigint);
signal(SIGHUP, handle_sigint);
signal(SIGTERM, handle_sigint);
signal(SIGALRM, handle_sigint);
+ if (net_mode == Net_client && net_setup_client())
+ return 1;
+
+ if (start_devices() != 0)
+ return 1;
+
atexit(stop_all_tracing);
if (stop_watch)
alarm(stop_watch);
- get_and_write_events();
+ wait_for_threads();
if (!is_trace_stopped()) {
trace_stopped = 1;