Net_client,
};
+enum thread_status {
+ Th_running,
+ Th_leaving,
+ Th_error
+};
+
/*
* Generic stats collected: nevents can be _roughly_ estimated by data_read
* (discounting pdu...)
char *path; /* path to device special file */
char *buts_name; /* name returned from bt kernel code */
struct pdc_stats *stats;
- int fd, idx, ncpus;
+ int fd, ncpus;
unsigned long long drops;
/*
struct cl_host *ch;
u32 cl_id;
time_t cl_connect_time;
+ int setup_done; /* ioctl BLKTRACESETUP done */
struct io_info *ios;
};
struct io_info *ios;
struct pollfd *pfds;
pthread_t thread;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
int cpu, nios;
- volatile int running, status, is_done;
+ volatile int status, is_done;
};
/*
*/
int data_is_native = -1;
+static int ndevs;
+static int max_cpus;
static int ncpus;
+static cpu_set_t *online_cpus;
static int pagesize;
static int act_mask = ~0U;
+static int kill_running_trace;
+static int stop_watch;
+static int piped_output;
+
static char *debugfs_path = "/sys/kernel/debug";
static char *output_name;
static char *output_dir;
-static int kill_running_trace;
-static int stop_watch;
+
static unsigned long buf_size = BUF_SIZE;
static unsigned long buf_nr = BUF_NR;
+
+static FILE *pfp;
+
static LIST_HEAD(devpaths);
static LIST_HEAD(tracers);
-static int ndevs;
+
static volatile int done;
-static FILE *pfp;
-static int piped_output;
-static int ntracers;
/*
* tracer threads add entries, the main thread takes them off and processes
static volatile int dp_entries;
/*
- * This synchronizes the starting of trace gathering amongst all tracer
- * threads.
+ * These synchronize master / thread interactions.
*/
-static pthread_cond_t ub_cond = PTHREAD_COND_INITIALIZER;
-static pthread_mutex_t ub_mutex = PTHREAD_MUTEX_INITIALIZER;
-static volatile int unblock_tracers;
+static pthread_cond_t mt_cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t mt_mutex = PTHREAD_MUTEX_INITIALIZER;
+static volatile int nthreads_running;
+static volatile int nthreads_leaving;
+static volatile int nthreads_error;
+static volatile int tracers_run;
/*
* network cmd line params
}
};
-static char usage_str[] = \
- "-d <dev> [ -r debugfs path ] [ -o <output> ] [-k ] [ -w time ]\n" \
- "[ -a action ] [ -A action mask ] [ -I <devs file> ] [ -v ]\n\n" \
+static char usage_str[] = "\n\n" \
+ "-d <dev> | --dev=<dev>\n" \
+ "[ -r <debugfs path> | --relay=<debugfs path> ]\n" \
+ "[ -o <file> | --output=<file>]\n" \
+ "[ -D <dir> | --output-dir=<dir>\n" \
+ "[ -w <time> | --stopwatch=<time>]\n" \
+ "[ -a <action field> | --act-mask=<action field>]\n" \
+ "[ -A <action mask> | --set-mask=<action mask>]\n" \
+ "[ -b <size> | --buffer-size]\n" \
+ "[ -n <number> | --num-sub-buffers=<number>]\n" \
+ "[ -l | --listen]\n" \
+ "[ -h <hostname> | --host=<hostname>]\n" \
+ "[ -p <port number> | --port=<port number>]\n" \
+ "[ -s | --no-sendfile]\n" \
+ "[ -I <devs file> | --input-devs=<devs file>]\n" \
+ "[ -v <version> | --version]\n" \
+ "[ -V <version> | --version]\n" \
+
"\t-d Use specified device. May also be given last after options\n" \
"\t-r Path to mounted debugfs, defaults to /sys/kernel/debug\n" \
"\t-o File(s) to send output to\n" \
"\t-D Directory to prepend to output file names\n" \
- "\t-k Kill a running trace\n" \
"\t-w Stop after defined time, in seconds\n" \
"\t-a Only trace specified actions. See documentation\n" \
"\t-A Give trace mask as a single value. See documentation\n" \
- "\t-b Sub buffer size in KiB\n" \
- "\t-n Number of sub buffers\n" \
+ "\t-b Sub buffer size in KiB (default 512)\n" \
+ "\t-n Number of sub buffers (default 4)\n" \
"\t-l Run in network listen mode (blktrace server)\n" \
"\t-h Run in network client mode, connecting to the given host\n" \
"\t-p Network port to use (default 8462)\n" \
"\t-s Make the network client NOT use sendfile() to transfer data\n" \
"\t-I Add devices found in <devs file>\n" \
+ "\t-v Print program version info\n" \
"\t-V Print program version info\n\n";
static void clear_events(struct pollfd *pfd)
static void show_usage(char *prog)
{
- fprintf(stderr, "Usage: %s %s %s", prog, blktrace_version, usage_str);
+ fprintf(stderr, "Usage: %s %s", prog, usage_str);
+}
+
+/*
+ * Create a timespec 'msec' milliseconds into the future
+ */
+static inline void make_timespec(struct timespec *tsp, long delta_msec)
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+ tsp->tv_sec = now.tv_sec;
+ tsp->tv_nsec = 1000L * now.tv_usec;
+
+ tsp->tv_nsec += (delta_msec * 1000000L);
+ if (tsp->tv_nsec > 1000000000L) {
+ long secs = tsp->tv_nsec / 1000000000L;
+
+ tsp->tv_sec += secs;
+ tsp->tv_nsec -= (secs * 1000000000L);
+ }
+}
+
+/*
+ * Add a timer to ensure wait ends
+ */
+static void t_pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+ struct timespec ts;
+
+ make_timespec(&ts, 50);
+ pthread_cond_timedwait(cond, mutex, &ts);
+}
+
+static void unblock_tracers(void)
+{
+ pthread_mutex_lock(&mt_mutex);
+ tracers_run = 1;
+ pthread_cond_broadcast(&mt_cond);
+ pthread_mutex_unlock(&mt_mutex);
+}
+
+static void tracer_wait_unblock(struct tracer *tp)
+{
+ pthread_mutex_lock(&mt_mutex);
+ while (!tp->is_done && !tracers_run)
+ pthread_cond_wait(&mt_cond, &mt_mutex);
+ pthread_mutex_unlock(&mt_mutex);
+}
+
+static void tracer_signal_ready(struct tracer *tp,
+ enum thread_status th_status,
+ int status)
+{
+ pthread_mutex_lock(&mt_mutex);
+ tp->status = status;
+
+ if (th_status == Th_running)
+ nthreads_running++;
+ else if (th_status == Th_error)
+ nthreads_error++;
+ else
+ nthreads_leaving++;
+
+ pthread_cond_signal(&mt_cond);
+ pthread_mutex_unlock(&mt_mutex);
+}
+
+static void wait_tracers_ready(int ncpus_started)
+{
+ pthread_mutex_lock(&mt_mutex);
+ while ((nthreads_running + nthreads_error) < ncpus_started)
+ t_pthread_cond_wait(&mt_cond, &mt_mutex);
+ pthread_mutex_unlock(&mt_mutex);
+}
+
+static void wait_tracers_leaving(void)
+{
+ pthread_mutex_lock(&mt_mutex);
+ while (nthreads_leaving < nthreads_running)
+ t_pthread_cond_wait(&mt_cond, &mt_mutex);
+ pthread_mutex_unlock(&mt_mutex);
}
static void init_mmap_info(struct mmap_info *mip)
static int lock_on_cpu(int cpu)
{
- cpu_set_t cpu_mask;
+ cpu_set_t * cpu_mask;
+ size_t size;
- CPU_ZERO(&cpu_mask);
- CPU_SET(cpu, &cpu_mask);
- if (sched_setaffinity(getpid(), sizeof(cpu_mask), &cpu_mask) < 0)
+ cpu_mask = CPU_ALLOC(max_cpus);
+ size = CPU_ALLOC_SIZE(max_cpus);
+
+ CPU_ZERO_S(size, cpu_mask);
+ CPU_SET_S(cpu, size, cpu_mask);
+ if (sched_setaffinity(0, size, cpu_mask) < 0) {
+ CPU_FREE(cpu_mask);
return errno;
+ }
+ CPU_FREE(cpu_mask);
return 0;
}
-/*
- * Create a timespec 'msec' milliseconds into the future
- */
-static inline void make_timespec(struct timespec *tsp, long delta_msec)
-{
- struct timeval now;
-
- gettimeofday(&now, NULL);
- tsp->tv_sec = now.tv_sec;
- tsp->tv_nsec = 1000L * now.tv_usec;
-
- tsp->tv_nsec += (delta_msec * 1000000L);
- if (tsp->tv_nsec > 1000000000L) {
- long secs = tsp->tv_nsec / 1000000000L;
-
- tsp->tv_sec += secs;
- tsp->tv_nsec -= (secs * 1000000000L);
- }
-}
-
static int increase_limit(int resource, rlim_t increase)
{
struct rlimit rlim;
return new;
}
-static int my_mlock(const void *addr, size_t len)
+static int my_mlock(struct tracer *tp,
+ const void *addr, size_t len)
{
- int ret;
+ int ret, retry = 0;
do {
ret = mlock(addr, len);
+ if ((retry >= 10) && tp && tp->is_done)
+ break;
+ retry++;
} while (ret < 0 && handle_mem_failure(len));
return ret;
}
+static int setup_mmap(int fd, unsigned int maxlen,
+ struct mmap_info *mip,
+ struct tracer *tp)
+{
+ if (mip->fs_off + maxlen > mip->fs_buf_len) {
+ unsigned long nr = max(16, mip->buf_nr);
+
+ if (mip->fs_buf) {
+ munlock(mip->fs_buf, mip->fs_buf_len);
+ munmap(mip->fs_buf, mip->fs_buf_len);
+ mip->fs_buf = NULL;
+ }
+
+ mip->fs_off = mip->fs_size & (mip->pagesize - 1);
+ mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
+ mip->fs_max_size += mip->fs_buf_len;
+
+ if (ftruncate(fd, mip->fs_max_size) < 0) {
+ perror("setup_mmap: ftruncate");
+ return 1;
+ }
+
+ mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
+ MAP_SHARED, fd,
+ mip->fs_size - mip->fs_off);
+ if (mip->fs_buf == MAP_FAILED) {
+ perror("setup_mmap: mmap");
+ return 1;
+ }
+ if (my_mlock(tp, mip->fs_buf, mip->fs_buf_len) < 0) {
+ perror("setup_mlock: mlock");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
static int __stop_trace(int fd)
{
/*
if (ret == 0)
break;
else if (ret < 0) {
- if (errno != EAGAIN) {
- perror("server: net_recv_data: recv failed");
- break;
- } else
- break;
+ if (errno == EAGAIN) {
+ usleep(50);
+ continue;
+ }
+ perror("server: net_recv_data: recv failed");
+ break;
} else {
buf += ret;
bytes_left -= ret;
memset(&hdr, 0, sizeof(hdr));
hdr.magic = BLK_IO_TRACE_MAGIC;
+ memset(hdr.buts_name, 0, sizeof(hdr.buts_name));
strncpy(hdr.buts_name, buts_name, sizeof(hdr.buts_name));
- hdr.buts_name[sizeof(hdr.buts_name)-1] = '\0';
+ hdr.buts_name[sizeof(hdr.buts_name) - 1] = '\0';
hdr.cpu = cpu;
- hdr.max_cpus = ncpus;
+ hdr.max_cpus = max_cpus;
hdr.len = len;
hdr.cl_id = getpid();
hdr.buf_size = buf_size;
/*
* Returns:
- * 0: "EOF"
- * 1: OK
- * -1: Error
+ * 0: "EOF"
+ * 1: OK
+ * -1: Error
*/
static int net_get_header(struct cl_conn *nc, struct blktrace_net_hdr *bnh)
{
return 1;
else if (bytes_read == 0)
return 0;
- return -1;
+ else
+ return -1;
}
static int net_setup_addr(void)
}
memcpy(&addr->sin_addr, hent->h_addr, 4);
- strcpy(hostname, hent->h_name);
+ memset(hostname, 0, sizeof(hostname));
+ strncpy(hostname, hent->h_name, sizeof(hostname));
+ hostname[sizeof(hostname) - 1] = '\0';
}
return 0;
hostname);
else
perror("client: connect");
+
close(fd);
return -1;
}
static int open_client_connections(void)
{
int cpu;
+ size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
cl_fds = calloc(ncpus, sizeof(*cl_fds));
- for (cpu = 0; cpu < ncpus; cpu++) {
+ for (cpu = 0; cpu < max_cpus; cpu++) {
+ if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
+ continue;
cl_fds[cpu] = net_setup_client();
if (cl_fds[cpu] < 0)
goto err;
{
if (cl_fds) {
int cpu, *fdp;
+ size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
- for (cpu = 0, fdp = cl_fds; cpu < ncpus; cpu++, fdp++) {
+ for (cpu = 0, fdp = cl_fds; cpu < max_cpus; cpu++, fdp++) {
+ if (!CPU_ISSET_S(cpu, alloc_size, online_cpus))
+ continue;
if (*fdp >= 0) {
net_send_drops(*fdp);
net_close_connection(fdp);
}
}
-static void setup_buts(void)
+static int setup_buts(void)
{
struct list_head *p;
+ int ret = 0;
__list_for_each(p, &devpaths) {
struct blk_user_trace_setup buts;
buts.buf_nr = buf_nr;
buts.act_mask = act_mask;
- if (ioctl(dpp->fd, BLKTRACESETUP, &buts) < 0) {
- fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
- dpp->path, errno, strerror(errno));
- }
- else {
- dpp->ncpus = ncpus;
+ if (ioctl(dpp->fd, BLKTRACESETUP, &buts) >= 0) {
+ dpp->ncpus = max_cpus;
dpp->buts_name = strdup(buts.name);
+ dpp->setup_done = 1;
if (dpp->stats)
free(dpp->stats);
dpp->stats = calloc(dpp->ncpus, sizeof(*dpp->stats));
memset(dpp->stats, 0, dpp->ncpus * sizeof(*dpp->stats));
+ } else {
+ fprintf(stderr, "BLKTRACESETUP(2) %s failed: %d/%s\n",
+ dpp->path, errno, strerror(errno));
+ ret++;
}
}
+
+ return ret;
}
static void start_buts(void)
__list_for_each(p, &devpaths) {
struct devpath *dpp = list_entry(p, struct devpath, head);
+
dpp->drops = get_drops(dpp);
}
}
int cpu;
struct tracer_devpath_head *hd;
- for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
+ for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
if (hd->prev)
free(hd->prev);
+
pthread_mutex_destroy(&hd->mutex);
}
free(dpp->heads);
struct tracer_devpath_head *hd;
struct devpath *dpp = list_entry(p, struct devpath, head);
- dpp->heads = calloc(ncpus, sizeof(struct tracer_devpath_head));
- for (cpu = 0, hd = dpp->heads; cpu < ncpus; cpu++, hd++) {
+ dpp->heads = calloc(max_cpus, sizeof(struct tracer_devpath_head));
+ for (cpu = 0, hd = dpp->heads; cpu < max_cpus; cpu++, hd++) {
INIT_LIST_HEAD(&hd->head);
pthread_mutex_init(&hd->mutex, NULL);
hd->prev = NULL;
pthread_mutex_unlock(&dp_mutex);
}
+static void decr_entries(int handled)
+{
+ pthread_mutex_lock(&dp_mutex);
+ dp_entries -= handled;
+ pthread_mutex_unlock(&dp_mutex);
+}
+
+static int wait_empty_entries(void)
+{
+ pthread_mutex_lock(&dp_mutex);
+ while (!done && dp_entries == 0)
+ t_pthread_cond_wait(&dp_cond, &dp_mutex);
+ pthread_mutex_unlock(&dp_mutex);
+
+ return !done;
+}
+
static int add_devpath(char *path)
{
int fd;
struct devpath *dpp;
+ struct list_head *p;
+ /*
+ * Verify device is not duplicated
+ */
+ __list_for_each(p, &devpaths) {
+ struct devpath *tmp = list_entry(p, struct devpath, head);
+ if (!strcmp(tmp->path, path))
+ return 0;
+ }
/*
* Verify device is valid before going too far
*/
memset(dpp, 0, sizeof(*dpp));
dpp->path = strdup(path);
dpp->fd = fd;
- dpp->idx = ndevs++;
+ ndevs++;
list_add_tail(&dpp->head, &devpaths);
return 0;
struct devpath *dpp = list_entry(p, struct devpath, head);
list_del(&dpp->head);
- __stop_trace(dpp->fd);
+ if (dpp->setup_done)
+ __stop_trace(dpp->fd);
close(dpp->fd);
if (dpp->heads)
if (net_send_header(fd, tbp->cpu, dpp->buts_name, tbp->len))
return 1;
-
- if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
+ else if (net_send_data(fd, tbp->buf, tbp->len) != tbp->len)
return 1;
return 0;
return entries_handled;
}
+/*
+ * Tack 'tbp's buf onto the tail of 'prev's buf
+ */
+static struct trace_buf *tb_combine(struct trace_buf *prev,
+ struct trace_buf *tbp)
+{
+ unsigned long tot_len;
+
+ tot_len = prev->len + tbp->len;
+ if (tot_len > buf_size) {
+ /*
+ * tbp->head isn't connected (it was 'prev'
+ * so it had been taken off of the list
+ * before). Therefore, we can realloc
+ * the whole structures, as the other fields
+ * are "static".
+ */
+ prev = realloc(prev, sizeof(*prev) + tot_len);
+ prev->buf = (void *)(prev + 1);
+ }
+
+ memcpy(prev->buf + prev->len, tbp->buf, tbp->len);
+ prev->len = tot_len;
+
+ free(tbp);
+ return prev;
+}
+
static int handle_list_file(struct tracer_devpath_head *hd,
struct list_head *list)
{
* If there was some leftover before, tack this new
* entry onto the tail of the previous one.
*/
- if (prev) {
- unsigned long tot_len;
- struct trace_buf *tmp = tbp;
-
- tbp = prev;
- prev = NULL;
-
- tot_len = tbp->len + tmp->len;
- if (tot_len > buf_size) {
- /*
- * tbp->head isn't connected (it was 'prev'
- * so it had been taken off of the list
- * before). Therefore, we can realloc
- * the whole structures, as the other fields
- * are "static".
- */
- tbp = realloc(tbp->buf, sizeof(*tbp) + tot_len);
- tbp->buf = (void *)(tbp + 1);
- }
-
- memcpy(tbp->buf + tbp->len, tmp->buf, tmp->len);
- tbp->len = tot_len;
-
- free(tmp);
- }
+ if (prev)
+ tbp = tb_combine(prev, tbp);
/*
* See how many whole traces there are - send them
* for the next pass.
*/
if (off) {
- if (write_data(tbp->buf, off) || off == tbp->len)
+ if (write_data(tbp->buf, off) || off == tbp->len) {
free(tbp);
+ prev = NULL;
+ }
else {
/*
* Move valid data to beginning of buffer
struct devpath *dpp = list_entry(p, struct devpath, head);
struct tracer_devpath_head *hd = dpp->heads;
- for (cpu = 0; cpu < ncpus; cpu++, hd++) {
+ for (cpu = 0; cpu < max_cpus; cpu++, hd++) {
pthread_mutex_lock(&hd->mutex);
if (list_empty(&hd->head)) {
pthread_mutex_unlock(&hd->mutex);
}
}
- if (handled) {
- pthread_mutex_lock(&dp_mutex);
- dp_entries -= handled;
- pthread_mutex_unlock(&dp_mutex);
- }
+ if (handled)
+ decr_entries(handled);
}
static void process_trace_bufs(void)
{
- while (!done) {
- pthread_mutex_lock(&dp_mutex);
- while (!done && dp_entries == 0) {
- struct timespec ts;
-
- make_timespec(&ts, 50);
- pthread_cond_timedwait(&dp_cond, &dp_mutex, &ts);
- }
- pthread_mutex_unlock(&dp_mutex);
-
+ while (wait_empty_entries())
__process_trace_bufs();
- }
}
static void clean_trace_bufs(void)
return net_sendfile(iop);
}
-static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
+static int fill_ofname(char *dst, int dstlen, char *subdir, char *buts_name,
+ int cpu)
{
+ int len;
struct stat sb;
- int i, nentries = 0;
- struct pdc_stats *sp;
- struct pollfd *pfd = tp->pfds;
- struct io_info *iop = tp->ios;
- for (i = 0; nevs > 0 && i < ndevs; i++, pfd++, iop++, sp++) {
- if (pfd->revents & POLLIN || force_read) {
- if (fstat(iop->ifd, &sb) < 0) {
- perror(iop->ifn);
- pfd->events = 0;
- } else if (sb.st_size > (off_t)iop->data_queued) {
- iop->ready = sb.st_size - iop->data_queued;
- iop->data_queued = sb.st_size;
- if (!net_sendfile_data(tp, iop)) {
- pdc_dr_update(iop->dpp, tp->cpu,
- iop->ready);
- nentries++;
- } else
- clear_events(pfd);
- }
- nevs--;
+ if (output_dir)
+ len = snprintf(dst, dstlen, "%s/", output_dir);
+ else
+ len = snprintf(dst, dstlen, "./");
+
+ if (subdir)
+ len += snprintf(dst + len, dstlen - len, "%s", subdir);
+
+ if (stat(dst, &sb) < 0) {
+ if (errno != ENOENT) {
+ fprintf(stderr,
+ "Destination dir %s stat failed: %d/%s\n",
+ dst, errno, strerror(errno));
+ return 1;
+ }
+ /*
+ * There is no synchronization between multiple threads
+ * trying to create the directory at once. It's harmless
+ * to let them try, so just detect the problem and move on.
+ */
+ if (mkdir(dst, 0755) < 0 && errno != EEXIST) {
+ fprintf(stderr,
+ "Destination dir %s can't be made: %d/%s\n",
+ dst, errno, strerror(errno));
+ return 1;
}
}
- if (nentries)
- incr_entries(nentries);
+ if (output_name)
+ snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
+ output_name, cpu);
+ else
+ snprintf(dst + len, dstlen - len, "%s.blktrace.%d",
+ buts_name, cpu);
- return nentries;
+ return 0;
}
-static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
+static int set_vbuf(struct io_info *iop, int mode, size_t size)
{
- int i, nentries = 0;
- struct trace_buf *tbp;
- struct pollfd *pfd = tp->pfds;
- struct io_info *iop = tp->ios;
+ iop->obuf = malloc(size);
+ if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
+ fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
+ iop->dpp->path, (int)size, errno,
+ strerror(errno));
+ free(iop->obuf);
+ return 1;
+ }
- tbp = alloc_trace_buf(tp->cpu, buf_size);
- for (i = 0; i < ndevs; i++, pfd++, iop++) {
- if (pfd->revents & POLLIN || force_read) {
- tbp->len = read(iop->ifd, tbp->buf, buf_size);
- if (tbp->len > 0) {
- pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
- add_trace_buf(iop->dpp, tp->cpu, &tbp);
- nentries++;
- } else if (tbp->len == 0) {
- /*
- * Short reads after we're done stop us
- * from trying reads.
- */
- if (tp->is_done)
- clear_events(pfd);
- } else {
- read_err(tp->cpu, iop->ifn);
- if (errno != EAGAIN || tp->is_done)
- clear_events(pfd);
- }
- if (!piped_output && --nevs == 0)
- break;
- }
- }
- free(tbp);
-
- if (nentries)
- incr_entries(nentries);
-
- return nentries;
+ return 0;
}
-static int fill_ofname(struct io_info *iop, int cpu)
+static int iop_open(struct io_info *iop, int cpu)
{
- int len;
- struct stat sb;
- char *dst = iop->ofn;
-
- if (output_dir)
- len = snprintf(iop->ofn, sizeof(iop->ofn), "%s/", output_dir);
- else
- len = snprintf(iop->ofn, sizeof(iop->ofn), "./");
+ char hostdir[MAXPATHLEN + 64];
+ iop->ofd = -1;
if (net_mode == Net_server) {
struct cl_conn *nc = iop->nc;
+ int len;
- len += sprintf(dst + len, "%s-", nc->ch->hostname);
- len += strftime(dst + len, 64, "%F-%T/",
+ len = snprintf(hostdir, sizeof(hostdir), "%s-",
+ nc->ch->hostname);
+ len += strftime(hostdir + len, sizeof(hostdir) - len, "%F-%T/",
gmtime(&iop->dpp->cl_connect_time));
+ } else {
+ hostdir[0] = 0;
}
- if (stat(iop->ofn, &sb) < 0) {
- if (errno != ENOENT) {
- fprintf(stderr,
- "Destination dir %s stat failed: %d/%s\n",
- iop->ofn, errno, strerror(errno));
- return 1;
- }
- if (mkdir(iop->ofn, 0755) < 0) {
- fprintf(stderr,
- "Destination dir %s can't be made: %d/%s\n",
- iop->ofn, errno, strerror(errno));
- return 1;
- }
- }
-
- if (output_name)
- snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
- output_name, cpu);
- else
- snprintf(iop->ofn + len, sizeof(iop->ofn), "%s.blktrace.%d",
- iop->dpp->buts_name, cpu);
-
- return 0;
-}
-
-static int set_vbuf(struct io_info *iop, int mode, size_t size)
-{
- iop->obuf = malloc(size);
- if (setvbuf(iop->ofp, iop->obuf, mode, size) < 0) {
- fprintf(stderr, "setvbuf(%s, %d) failed: %d/%s\n",
- iop->dpp->path, (int)size, errno,
- strerror(errno));
- free(iop->obuf);
- return 1;
- }
-
- return 0;
-}
-
-static int iop_open(struct io_info *iop, int cpu)
-{
- iop->ofd = -1;
- if (fill_ofname(iop, cpu))
+ if (fill_ofname(iop->ofn, sizeof(iop->ofn), hostdir,
+ iop->dpp->buts_name, cpu))
return 1;
iop->ofp = my_fopen(iop->ofn, "w+");
iop->ofn, errno, strerror(errno));
return 1;
}
+
if (set_vbuf(iop, _IOLBF, FILE_VBUF_SIZE)) {
fprintf(stderr, "set_vbuf for file %s failed: %d/%s\n",
iop->ofn, errno, strerror(errno));
return 0;
}
+static void close_iop(struct io_info *iop)
+{
+ struct mmap_info *mip = &iop->mmap_info;
+
+ if (mip->fs_buf)
+ munmap(mip->fs_buf, mip->fs_buf_len);
+
+ if (!piped_output) {
+ if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
+ fprintf(stderr,
+ "Ignoring err: ftruncate(%s): %d/%s\n",
+ iop->ofn, errno, strerror(errno));
+ }
+ }
+
+ if (iop->ofp)
+ fclose(iop->ofp);
+ if (iop->obuf)
+ free(iop->obuf);
+}
+
+static void close_ios(struct tracer *tp)
+{
+ while (tp->nios > 0) {
+ struct io_info *iop = &tp->ios[--tp->nios];
+
+ iop->dpp->drops = get_drops(iop->dpp);
+ if (iop->ifd >= 0)
+ close(iop->ifd);
+
+ if (iop->ofp)
+ close_iop(iop);
+ else if (iop->ofd >= 0) {
+ struct devpath *dpp = iop->dpp;
+
+ net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
+ net_close_connection(&iop->ofd);
+ }
+ }
+
+ free(tp->ios);
+ free(tp->pfds);
+}
+
static int open_ios(struct tracer *tp)
{
struct pollfd *pfd;
struct list_head *p;
tp->ios = calloc(ndevs, sizeof(struct io_info));
- tp->pfds = calloc(ndevs, sizeof(struct pollfd));
-
memset(tp->ios, 0, ndevs * sizeof(struct io_info));
+
+ tp->pfds = calloc(ndevs, sizeof(struct pollfd));
memset(tp->pfds, 0, ndevs * sizeof(struct pollfd));
tp->nios = 0;
err:
close(iop->ifd); /* tp->nios _not_ bumped */
+ close_ios(tp);
return 1;
}
-static void close_iop(struct io_info *iop)
-{
- struct mmap_info *mip = &iop->mmap_info;
-
- if (mip->fs_buf)
- munmap(mip->fs_buf, mip->fs_buf_len);
-
- if (!piped_output) {
- if (ftruncate(fileno(iop->ofp), mip->fs_size) < 0) {
- fprintf(stderr,
- "Ignoring err: ftruncate(%s): %d/%s\n",
- iop->ofn, errno, strerror(errno));
- }
- }
-
- if (iop->ofp)
- fclose(iop->ofp);
- if (iop->obuf)
- free(iop->obuf);
-}
-
-static void close_ios(struct tracer *tp)
-{
- while (tp->nios > 0) {
- struct io_info *iop = &tp->ios[--tp->nios];
-
- iop->dpp->drops = get_drops(iop->dpp);
- if (iop->ifd >= 0)
- close(iop->ifd);
-
- if (iop->ofp)
- close_iop(iop);
- else if (iop->ofd >= 0) {
- struct devpath *dpp = iop->dpp;
-
- net_send_close(iop->ofd, dpp->buts_name, dpp->drops);
- net_close_connection(&iop->ofd);
- }
- }
-
- free(tp->ios);
- free(tp->pfds);
-}
-
-static int setup_mmap(int fd, unsigned int maxlen, struct mmap_info *mip)
-{
- if (mip->fs_off + maxlen > mip->fs_buf_len) {
- unsigned long nr = max(16, mip->buf_nr);
-
- if (mip->fs_buf) {
- munlock(mip->fs_buf, mip->fs_buf_len);
- munmap(mip->fs_buf, mip->fs_buf_len);
- mip->fs_buf = NULL;
- }
-
- mip->fs_off = mip->fs_size & (mip->pagesize - 1);
- mip->fs_buf_len = (nr * mip->buf_size) - mip->fs_off;
- mip->fs_max_size += mip->fs_buf_len;
-
- if (ftruncate(fd, mip->fs_max_size) < 0) {
- perror("__setup_mmap: ftruncate");
- return 1;
- }
-
- mip->fs_buf = my_mmap(NULL, mip->fs_buf_len, PROT_WRITE,
- MAP_SHARED, fd,
- mip->fs_size - mip->fs_off);
- if (mip->fs_buf == MAP_FAILED) {
- perror("__setup_mmap: mmap");
- return 1;
- }
- my_mlock(mip->fs_buf, mip->fs_buf_len);
- }
-
- return 0;
-}
-
static int handle_pfds_file(struct tracer *tp, int nevs, int force_read)
{
struct mmap_info *mip;
if (pfd->revents & POLLIN || force_read) {
mip = &iop->mmap_info;
- ret = setup_mmap(iop->ofd, buf_size, mip);
+ ret = setup_mmap(iop->ofd, buf_size, mip, tp);
if (ret < 0) {
pfd->events = 0;
break;
return nentries;
}
-static void *thread_main(void *arg)
+static int handle_pfds_netclient(struct tracer *tp, int nevs, int force_read)
{
- int ret, ndone;
- int to_val;
+ struct stat sb;
+ int i, nentries = 0;
+ struct pollfd *pfd = tp->pfds;
+ struct io_info *iop = tp->ios;
+ for (i = 0; i < ndevs; i++, pfd++, iop++) {
+ if (pfd->revents & POLLIN || force_read) {
+ if (fstat(iop->ifd, &sb) < 0) {
+ perror(iop->ifn);
+ pfd->events = 0;
+ } else if (sb.st_size > (off_t)iop->data_queued) {
+ iop->ready = sb.st_size - iop->data_queued;
+ iop->data_queued = sb.st_size;
+
+ if (!net_sendfile_data(tp, iop)) {
+ pdc_dr_update(iop->dpp, tp->cpu,
+ iop->ready);
+ nentries++;
+ } else
+ clear_events(pfd);
+ }
+ if (--nevs == 0)
+ break;
+ }
+ }
+
+ if (nentries)
+ incr_entries(nentries);
+
+ return nentries;
+}
+
+static int handle_pfds_entries(struct tracer *tp, int nevs, int force_read)
+{
+ int i, nentries = 0;
+ struct trace_buf *tbp;
+ struct pollfd *pfd = tp->pfds;
+ struct io_info *iop = tp->ios;
+
+ tbp = alloc_trace_buf(tp->cpu, buf_size);
+ for (i = 0; i < ndevs; i++, pfd++, iop++) {
+ if (pfd->revents & POLLIN || force_read) {
+ tbp->len = read(iop->ifd, tbp->buf, buf_size);
+ if (tbp->len > 0) {
+ pdc_dr_update(iop->dpp, tp->cpu, tbp->len);
+ add_trace_buf(iop->dpp, tp->cpu, &tbp);
+ nentries++;
+ } else if (tbp->len == 0) {
+ /*
+ * Short reads after we're done stop us
+ * from trying reads.
+ */
+ if (tp->is_done)
+ clear_events(pfd);
+ } else {
+ read_err(tp->cpu, iop->ifn);
+ if (errno != EAGAIN || tp->is_done)
+ clear_events(pfd);
+ }
+ if (!piped_output && --nevs == 0)
+ break;
+ }
+ }
+ free(tbp);
+
+ if (nentries)
+ incr_entries(nentries);
+
+ return nentries;
+}
+
+static void *thread_main(void *arg)
+{
+ int ret, ndone, to_val;
struct tracer *tp = arg;
ret = lock_on_cpu(tp->cpu);
goto err;
ret = open_ios(tp);
- if (ret) {
- close_ios(tp);
+ if (ret)
goto err;
- }
-
- pthread_mutex_lock(&tp->mutex);
- tp->running = 1;
- pthread_cond_signal(&tp->cond);
- pthread_mutex_unlock(&tp->mutex);
if (piped_output)
to_val = 50; /* Frequent partial handles */
else
to_val = 500; /* 1/2 second intervals */
- pthread_mutex_lock(&ub_mutex);
- while (!tp->is_done && !unblock_tracers)
- pthread_cond_wait(&ub_cond, &ub_mutex);
- pthread_mutex_unlock(&ub_mutex);
+
+ tracer_signal_ready(tp, Th_running, 0);
+ tracer_wait_unblock(tp);
while (!tp->is_done) {
ndone = poll(tp->pfds, ndevs, to_val);
;
close_ios(tp);
+ tracer_signal_ready(tp, Th_leaving, 0);
+ return NULL;
err:
- pthread_mutex_lock(&tp->mutex);
- tp->running = 0;
- tp->status = ret;
- pthread_cond_signal(&tp->cond);
- pthread_mutex_unlock(&tp->mutex);
+ tracer_signal_ready(tp, Th_error, ret);
return NULL;
}
memset(tp, 0, sizeof(*tp));
INIT_LIST_HEAD(&tp->head);
- pthread_mutex_init(&tp->mutex, NULL);
- pthread_cond_init(&tp->cond, NULL);
- tp->running = 0;
tp->status = 0;
tp->cpu = cpu;
if (pthread_create(&tp->thread, NULL, thread_main, tp)) {
fprintf(stderr, "FAILED to start thread on CPU %d: %d/%s\n",
cpu, errno, strerror(errno));
- goto err;
+ free(tp);
+ return 1;
}
- pthread_mutex_lock(&tp->mutex);
- while (!tp->running && (tp->status == 0))
- pthread_cond_wait(&tp->cond, &tp->mutex);
- pthread_mutex_unlock(&tp->mutex);
+ list_add_tail(&tp->head, &tracers);
+ return 0;
+}
- if (tp->status == 0) {
- list_add_tail(&tp->head, &tracers);
- return 0;
- }
+static int create_output_files(int cpu)
+{
+ char fname[MAXPATHLEN + 64];
+ struct list_head *p;
+ FILE *f;
- fprintf(stderr, "FAILED to start thread on CPU %d\n", cpu);
+ __list_for_each(p, &devpaths) {
+ struct devpath *dpp = list_entry(p, struct devpath, head);
-err:
- pthread_mutex_destroy(&tp->mutex);
- pthread_cond_destroy(&tp->cond);
- free(tp);
- return 1;
+ if (fill_ofname(fname, sizeof(fname), NULL, dpp->buts_name,
+ cpu))
+ return 1;
+ f = my_fopen(fname, "w+");
+ if (!f)
+ return 1;
+ fclose(f);
+ }
+ return 0;
}
-static int start_tracers(void)
+static void start_tracers(void)
{
- int cpu;
+ int cpu, started = 0;
+ struct list_head *p;
+ size_t alloc_size = CPU_ALLOC_SIZE(max_cpus);
- for (cpu = 0; cpu < ncpus; cpu++)
+ for (cpu = 0; cpu < max_cpus; cpu++) {
+ if (!CPU_ISSET_S(cpu, alloc_size, online_cpus)) {
+ /*
+ * Create fake empty output files so that other tools
+ * like blkparse don't have to bother with sparse CPU
+ * number space.
+ */
+ if (create_output_files(cpu))
+ break;
+ continue;
+ }
if (start_tracer(cpu))
break;
+ started++;
+ }
- return cpu;
+ wait_tracers_ready(started);
+
+ __list_for_each(p, &tracers) {
+ struct tracer *tp = list_entry(p, struct tracer, head);
+ if (tp->status)
+ fprintf(stderr,
+ "FAILED to start thread on CPU %d: %d/%s\n",
+ tp->cpu, tp->status, strerror(tp->status));
+ }
}
static void stop_tracers(void)
struct tracer *tp = list_entry(p, struct tracer, head);
tp->is_done = 1;
}
+ pthread_cond_broadcast(&mt_cond);
}
static void del_tracers(void)
list_del(&tp->head);
free(tp);
}
- ntracers = 0;
}
static void wait_tracers(void)
if (use_tracer_devpaths())
process_trace_bufs();
+ wait_tracers_leaving();
+
__list_for_each(p, &tracers) {
int ret;
struct tracer *tp = list_entry(p, struct tracer, head);
- pthread_mutex_lock(&tp->mutex);
- while (tp->running)
- pthread_cond_wait(&tp->cond, &tp->mutex);
- pthread_mutex_unlock(&tp->mutex);
-
ret = pthread_join(tp->thread, NULL);
if (ret)
fprintf(stderr, "Thread join %d failed %d\n",
return 1;
}
- while (fscanf(ifp, "%s\n", dev_line) == 1)
- if (add_devpath(dev_line) != 0)
+ while (fscanf(ifp, "%s\n", dev_line) == 1) {
+ if (add_devpath(dev_line) != 0) {
+ fclose(ifp);
return 1;
+ }
+ }
+ fclose(ifp);
break;
}
break;
case 'h':
net_mode = Net_client;
- strcpy(hostname, optarg);
+ memset(hostname, 0, sizeof(hostname));
+ strncpy(hostname, optarg, sizeof(hostname));
+ hostname[sizeof(hostname) - 1] = '\0';
break;
case 'l':
net_mode = Net_server;
return 1;
}
- if (statfs(debugfs_path, &st) < 0 || st.f_type != (long)DEBUGFS_TYPE) {
+ if (statfs(debugfs_path, &st) < 0) {
fprintf(stderr, "Invalid debug path %s: %d/%s\n",
debugfs_path, errno, strerror(errno));
return 1;
}
+ if (st.f_type != (long)DEBUGFS_TYPE) {
+ fprintf(stderr, "Debugfs is not mounted at %s\n", debugfs_path);
+ return 1;
+ }
+
if (act_mask_tmp != 0)
act_mask = act_mask_tmp;
piped_output = 1;
handle_pfds = handle_pfds_entries;
pfp = stdout;
- setvbuf(pfp, NULL, _IONBF, 0);
+ if (setvbuf(pfp, NULL, _IONBF, 0)) {
+ perror("setvbuf stdout");
+ return 1;
+ }
} else
handle_pfds = handle_pfds_file;
return 0;
struct io_info *iop = &dpp->ios[bnh->cpu];
struct mmap_info *mip = &iop->mmap_info;
- if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info)) {
+ if (setup_mmap(iop->ofd, bnh->len, &iop->mmap_info, NULL)) {
fprintf(stderr, "ncd(%s:%d): mmap failed\n",
nc->ch->hostname, nc->fd);
exit(1);
return ret;
}
+static int run_tracers(void)
+{
+ atexit(exit_tracing);
+ if (net_mode == Net_client)
+ printf("blktrace: connecting to %s\n", hostname);
+
+ if (setup_buts())
+ return 1;
+
+ if (use_tracer_devpaths()) {
+ if (setup_tracer_devpaths())
+ return 1;
+
+ if (piped_output)
+ handle_list = handle_list_file;
+ else
+ handle_list = handle_list_net;
+ }
+
+ start_tracers();
+ if (nthreads_running == ncpus) {
+ unblock_tracers();
+ start_buts();
+ if (net_mode == Net_client)
+ printf("blktrace: connected!\n");
+ if (stop_watch)
+ alarm(stop_watch);
+ } else
+ stop_tracers();
+
+ wait_tracers();
+ if (nthreads_running == ncpus)
+ show_stats(&devpaths);
+ if (net_client_use_send())
+ close_client_connections();
+ del_tracers();
+
+ return 0;
+}
+
+static cpu_set_t *get_online_cpus(void)
+{
+ FILE *cpus;
+ cpu_set_t *set;
+ size_t alloc_size;
+ int cpuid, prevcpuid = -1;
+ char nextch;
+ int n, ncpu, curcpu = 0;
+ int *cpu_nums;
+
+ ncpu = sysconf(_SC_NPROCESSORS_CONF);
+ if (ncpu < 0)
+ return NULL;
+
+ cpu_nums = malloc(sizeof(int)*ncpu);
+ if (!cpu_nums) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ /*
+ * There is no way to easily get maximum CPU number. So we have to
+ * parse the file first to find it out and then create appropriate
+ * cpuset
+ */
+ cpus = my_fopen("/sys/devices/system/cpu/online", "r");
+ for (;;) {
+ n = fscanf(cpus, "%d%c", &cpuid, &nextch);
+ if (n <= 0)
+ break;
+ if (n == 2 && nextch == '-') {
+ prevcpuid = cpuid;
+ continue;
+ }
+ if (prevcpuid == -1)
+ prevcpuid = cpuid;
+ while (prevcpuid <= cpuid) {
+ /* More CPUs listed than configured? */
+ if (curcpu >= ncpu) {
+ errno = EINVAL;
+ return NULL;
+ }
+ cpu_nums[curcpu++] = prevcpuid++;
+ }
+ prevcpuid = -1;
+ }
+ fclose(cpus);
+
+ ncpu = curcpu;
+ max_cpus = cpu_nums[ncpu - 1] + 1;
+
+ /* Now that we have maximum cpu number, create a cpuset */
+ set = CPU_ALLOC(max_cpus);
+ if (!set) {
+ errno = ENOMEM;
+ return NULL;
+ }
+ alloc_size = CPU_ALLOC_SIZE(max_cpus);
+ CPU_ZERO_S(alloc_size, set);
+
+ for (curcpu = 0; curcpu < ncpu; curcpu++)
+ CPU_SET_S(cpu_nums[curcpu], alloc_size, set);
+
+ free(cpu_nums);
+
+ return set;
+}
+
int main(int argc, char *argv[])
{
int ret = 0;
setlocale(LC_NUMERIC, "en_US");
pagesize = getpagesize();
- ncpus = sysconf(_SC_NPROCESSORS_ONLN);
- if (ncpus < 0) {
- fprintf(stderr, "sysconf(_SC_NPROCESSORS_ONLN) failed %d/%s\n",
+ online_cpus = get_online_cpus();
+ if (!online_cpus) {
+ fprintf(stderr, "cannot get online cpus %d/%s\n",
errno, strerror(errno));
ret = 1;
goto out;
+ } else if (handle_args(argc, argv)) {
+ ret = 1;
+ goto out;
}
- if (handle_args(argc, argv)) {
+ ncpus = CPU_COUNT_S(CPU_ALLOC_SIZE(max_cpus), online_cpus);
+ if (ndevs > 1 && output_name && strcmp(output_name, "-") != 0) {
+ fprintf(stderr, "-o not supported with multiple devices\n");
ret = 1;
goto out;
}
fprintf(stderr, "-o ignored in server mode\n");
output_name = NULL;
}
-
ret = net_server();
- } else {
- atexit(exit_tracing);
-
- if (net_mode == Net_client)
- printf("blktrace: connecting to %s\n", hostname);
-
- setup_buts();
-
- if (use_tracer_devpaths()) {
- if (setup_tracer_devpaths())
- goto out;
-
- if (piped_output)
- handle_list = handle_list_file;
- else
- handle_list = handle_list_net;
- }
-
- ntracers = start_tracers();
- if (ntracers != ncpus)
- stop_tracers();
- else {
- /*
- * Let tracers go
- */
- pthread_mutex_lock(&ub_mutex);
- unblock_tracers = 1;
- pthread_cond_broadcast(&ub_cond);
- pthread_mutex_unlock(&ub_mutex);
-
- start_buts();
-
- if (net_mode == Net_client)
- printf("blktrace: connected!\n");
-
- if (stop_watch)
- alarm(stop_watch);
- }
-
- wait_tracers();
- if (ntracers == ncpus)
- show_stats(&devpaths);
-
- if (net_client_use_send())
- close_client_connections();
- del_tracers();
- }
+ } else
+ ret = run_tracers();
out:
if (pfp)