#include "../fio.h"
#include "../hash.h"
+#include "../optgroup.h"
#include <rdma/rdma_cma.h>
-#include <infiniband/arch.h>
#define FIO_RDMA_MAX_IO_DEPTH 512
},
{ .ival = "recv",
.oval = FIO_RDMA_CHA_RECV,
- .help = "Posted Recieve",
+ .help = "Posted Receive",
},
},
.category = FIO_OPT_C_ENGINE,
uint32_t nr; /* client: io depth
server: number of records for memory semantic
*/
+ uint32_t max_bs; /* maximum block size */
struct remote_u rmt_us[FIO_RDMA_MAX_IO_DEPTH];
};
static int client_recv(struct thread_data *td, struct ibv_wc *wc)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
+ unsigned int max_bs;
if (wc->byte_len != sizeof(rd->recv_buf)) {
log_err("Received bogus data, size %d\n", wc->byte_len);
return 1;
}
+ max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
+ if (max_bs > ntohl(rd->recv_buf.max_bs)) {
+ log_err("fio: Server's block size (%d) must be greater than or "
+ "equal to the client's block size (%d)!\n",
+ ntohl(rd->recv_buf.max_bs), max_bs);
+ return 1;
+ }
+
/* store mr info for MEMORY semantic */
if ((rd->rdma_protocol == FIO_RDMA_MEM_WRITE) ||
(rd->rdma_protocol == FIO_RDMA_MEM_READ)) {
rd->rmt_nr = ntohl(rd->recv_buf.nr);
for (i = 0; i < rd->rmt_nr; i++) {
- rd->rmt_us[i].buf = ntohll(rd->recv_buf.rmt_us[i].buf);
+ rd->rmt_us[i].buf = be64_to_cpu(rd->recv_buf.rmt_us[i].buf);
rd->rmt_us[i].rkey = ntohl(rd->recv_buf.rmt_us[i].rkey);
rd->rmt_us[i].size = ntohl(rd->recv_buf.rmt_us[i].size);
static int server_recv(struct thread_data *td, struct ibv_wc *wc)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
+ unsigned int max_bs;
if (wc->wr_id == FIO_RDMA_MAX_IO_DEPTH) {
rd->rdma_protocol = ntohl(rd->recv_buf.mode);
/* CHANNEL semantic, do nothing */
if (rd->rdma_protocol == FIO_RDMA_CHA_SEND)
rd->rdma_protocol = FIO_RDMA_CHA_RECV;
+
+ max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
+ if (max_bs < ntohl(rd->recv_buf.max_bs)) {
+ log_err("fio: Server's block size (%d) must be greater than or "
+ "equal to the client's block size (%d)!\n",
+ ntohl(rd->recv_buf.max_bs), max_bs);
+ return 1;
+ }
+
}
return 0;
static int cq_event_handler(struct thread_data *td, enum ibv_wc_opcode opcode)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_wc wc;
struct rdma_io_u_data *r_io_u_d;
int ret;
case IBV_WC_RECV:
if (rd->is_client == 1)
- client_recv(td, &wc);
+ ret = client_recv(td, &wc);
else
- server_recv(td, &wc);
+ ret = server_recv(td, &wc);
+
+ if (ret)
+ return -1;
if (wc.wr_id == FIO_RDMA_MAX_IO_DEPTH)
break;
}
rd->cq_event_num++;
}
+
if (ret) {
log_err("fio: poll error %d\n", ret);
return 1;
*/
static int rdma_poll_wait(struct thread_data *td, enum ibv_wc_opcode opcode)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_cq *ev_cq;
void *ev_ctx;
int ret;
}
ret = cq_event_handler(td, opcode);
- if (ret < 1)
+ if (ret == 0)
goto again;
ibv_ack_cq_events(rd->cq, ret);
static int fio_rdmaio_setup_qp(struct thread_data *td)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_qp_init_attr init_attr;
int qp_depth = td->o.iodepth * 2; /* 2 times of io depth */
rd->pd = ibv_alloc_pd(rd->cm_id->verbs);
if (rd->pd == NULL) {
- log_err("fio: ibv_alloc_pd fail\n");
+ log_err("fio: ibv_alloc_pd fail: %m\n");
return 1;
}
else
rd->channel = ibv_create_comp_channel(rd->cm_id->verbs);
if (rd->channel == NULL) {
- log_err("fio: ibv_create_comp_channel fail\n");
+ log_err("fio: ibv_create_comp_channel fail: %m\n");
goto err1;
}
rd->cq = ibv_create_cq(rd->cm_id->verbs,
qp_depth, rd, rd->channel, 0);
if (rd->cq == NULL) {
- log_err("fio: ibv_create_cq failed\n");
+ log_err("fio: ibv_create_cq failed: %m\n");
goto err2;
}
if (ibv_req_notify_cq(rd->cq, 0) != 0) {
- log_err("fio: ibv_create_cq failed\n");
+ log_err("fio: ibv_req_notify_cq failed: %m\n");
goto err3;
}
if (rd->is_client == 0) {
if (rdma_create_qp(rd->child_cm_id, rd->pd, &init_attr) != 0) {
- log_err("fio: rdma_create_qp failed\n");
+ log_err("fio: rdma_create_qp failed: %m\n");
goto err3;
}
rd->qp = rd->child_cm_id->qp;
} else {
if (rdma_create_qp(rd->cm_id, rd->pd, &init_attr) != 0) {
- log_err("fio: rdma_create_qp failed\n");
+ log_err("fio: rdma_create_qp failed: %m\n");
goto err3;
}
rd->qp = rd->cm_id->qp;
static int fio_rdmaio_setup_control_msg_buffers(struct thread_data *td)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
rd->recv_mr = ibv_reg_mr(rd->pd, &rd->recv_buf, sizeof(rd->recv_buf),
IBV_ACCESS_LOCAL_WRITE);
if (rd->recv_mr == NULL) {
- log_err("fio: recv_buf reg_mr failed\n");
+ log_err("fio: recv_buf reg_mr failed: %m\n");
return 1;
}
rd->send_mr = ibv_reg_mr(rd->pd, &rd->send_buf, sizeof(rd->send_buf),
0);
if (rd->send_mr == NULL) {
- log_err("fio: send_buf reg_mr failed\n");
+ log_err("fio: send_buf reg_mr failed: %m\n");
ibv_dereg_mr(rd->recv_mr);
return 1;
}
struct rdma_event_channel *channel,
enum rdma_cm_event_type wait_event)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct rdma_cm_event *event;
int ret;
static int fio_rdmaio_prep(struct thread_data *td, struct io_u *io_u)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct rdma_io_u_data *r_io_u_d;
r_io_u_d = io_u->engine_data;
static struct io_u *fio_rdmaio_event(struct thread_data *td, int event)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct io_u *io_u;
int i;
static int fio_rdmaio_getevents(struct thread_data *td, unsigned int min,
unsigned int max, const struct timespec *t)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
enum ibv_wc_opcode comp_opcode;
struct ibv_cq *ev_cq;
void *ev_ctx;
static int fio_rdmaio_send(struct thread_data *td, struct io_u **io_us,
unsigned int nr)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_send_wr *bad_wr;
#if 0
enum ibv_wc_opcode comp_opcode;
}
if (ibv_post_send(rd->qp, &r_io_u_d->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail\n");
+ log_err("fio: ibv_post_send fail: %m\n");
return -1;
}
static int fio_rdmaio_recv(struct thread_data *td, struct io_u **io_us,
unsigned int nr)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_recv_wr *bad_wr;
struct rdma_io_u_data *r_io_u_d;
int i;
r_io_u_d = io_us[i]->engine_data;
if (ibv_post_recv(rd->qp, &r_io_u_d->rq_wr, &bad_wr) !=
0) {
- log_err("fio: ibv_post_recv fail\n");
+ log_err("fio: ibv_post_recv fail: %m\n");
return 1;
}
}
|| (rd->rdma_protocol == FIO_RDMA_MEM_WRITE)) {
/* re-post the rq_wr */
if (ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_recv fail\n");
+ log_err("fio: ibv_post_recv fail: %m\n");
return 1;
}
static int fio_rdmaio_queue(struct thread_data *td, struct io_u *io_u)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
fio_ro_check(td, io_u);
static void fio_rdmaio_queued(struct thread_data *td, struct io_u **io_us,
unsigned int nr)
{
- struct rdmaio_data *rd = td->io_ops->data;
- struct timeval now;
+ struct rdmaio_data *rd = td->io_ops_data;
+ struct timespec now;
unsigned int i;
if (!fio_fill_issue_time(td))
static int fio_rdmaio_commit(struct thread_data *td)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct io_u **io_us;
int ret;
static int fio_rdmaio_connect(struct thread_data *td, struct fio_file *f)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct rdma_conn_param conn_param;
struct ibv_send_wr *bad_wr;
conn_param.retry_count = 10;
if (rdma_connect(rd->cm_id, &conn_param) != 0) {
- log_err("fio: rdma_connect fail\n");
+ log_err("fio: rdma_connect fail: %m\n");
return 1;
}
rd->send_buf.nr = htonl(td->o.iodepth);
if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail");
+ log_err("fio: ibv_post_send fail: %m\n");
return 1;
}
- rdma_poll_wait(td, IBV_WC_SEND);
+ if (rdma_poll_wait(td, IBV_WC_SEND) < 0)
+ return 1;
/* wait for remote MR info from server side */
- rdma_poll_wait(td, IBV_WC_RECV);
+ if (rdma_poll_wait(td, IBV_WC_RECV) < 0)
+ return 1;
/* In SEND/RECV test, it's a good practice to setup the iodepth of
* of the RECV side deeper than that of the SEND side to
static int fio_rdmaio_accept(struct thread_data *td, struct fio_file *f)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct rdma_conn_param conn_param;
struct ibv_send_wr *bad_wr;
+ int ret = 0;
/* rdma_accept() - then wait for accept success */
memset(&conn_param, 0, sizeof(conn_param));
conn_param.initiator_depth = 1;
if (rdma_accept(rd->child_cm_id, &conn_param) != 0) {
- log_err("fio: rdma_accept\n");
+ log_err("fio: rdma_accept: %m\n");
return 1;
}
}
/* wait for request */
- rdma_poll_wait(td, IBV_WC_RECV);
+ ret = rdma_poll_wait(td, IBV_WC_RECV) < 0;
if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail");
+ log_err("fio: ibv_post_send fail: %m\n");
return 1;
}
- rdma_poll_wait(td, IBV_WC_SEND);
+ if (rdma_poll_wait(td, IBV_WC_SEND) < 0)
+ return 1;
- return 0;
+ return ret;
}
static int fio_rdmaio_open_file(struct thread_data *td, struct fio_file *f)
static int fio_rdmaio_close_file(struct thread_data *td, struct fio_file *f)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_send_wr *bad_wr;
/* unregister rdma buffer */
|| (rd->rdma_protocol ==
FIO_RDMA_MEM_READ))) {
if (ibv_post_send(rd->qp, &rd->sq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_send fail");
+ log_err("fio: ibv_post_send fail: %m\n");
return 1;
}
static int fio_rdmaio_setup_connect(struct thread_data *td, const char *host,
unsigned short port)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_recv_wr *bad_wr;
int err;
static int fio_rdmaio_setup_listen(struct thread_data *td, short port)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct ibv_recv_wr *bad_wr;
+ int state = td->runstate;
+
+ td_set_runstate(td, TD_SETTING_UP);
rd->addr.sin_family = AF_INET;
rd->addr.sin_addr.s_addr = htonl(INADDR_ANY);
/* rdma_listen */
if (rdma_bind_addr(rd->cm_id, (struct sockaddr *)&rd->addr) != 0) {
- log_err("fio: rdma_bind_addr fail\n");
+ log_err("fio: rdma_bind_addr fail: %m\n");
return 1;
}
if (rdma_listen(rd->cm_id, 3) != 0) {
- log_err("fio: rdma_listen fail\n");
+ log_err("fio: rdma_listen fail: %m\n");
return 1;
}
+ log_info("fio: waiting for connection\n");
+
/* wait for CONNECT_REQUEST */
if (get_next_channel_event
(td, rd->cm_channel, RDMA_CM_EVENT_CONNECT_REQUEST) != 0) {
/* post recv buf */
if (ibv_post_recv(rd->qp, &rd->rq_wr, &bad_wr) != 0) {
- log_err("fio: ibv_post_recv fail\n");
+ log_err("fio: ibv_post_recv fail: %m\n");
return 1;
}
+ td_set_runstate(td, state);
return 0;
}
static int fio_rdmaio_init(struct thread_data *td)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
struct rdmaio_options *o = td->eo;
unsigned int max_bs;
int ret, i;
rd->cm_channel = rdma_create_event_channel();
if (!rd->cm_channel) {
- log_err("fio: rdma_create_event_channel fail\n");
+ log_err("fio: rdma_create_event_channel fail: %m\n");
return 1;
}
ret = rdma_create_id(rd->cm_channel, &rd->cm_id, rd, RDMA_PS_TCP);
if (ret) {
- log_err("fio: rdma_create_id fail\n");
+ log_err("fio: rdma_create_id fail: %m\n");
return 1;
}
if (td_read(td)) { /* READ as the server */
rd->is_client = 0;
+ td->flags |= TD_F_NO_PROGRESS;
/* server rd->rdma_buf_len will be setup after got request */
ret = fio_rdmaio_setup_listen(td, o->port);
} else { /* WRITE as the client */
}
max_bs = max(td->o.max_bs[DDIR_READ], td->o.max_bs[DDIR_WRITE]);
+ rd->send_buf.max_bs = htonl(max_bs);
+
/* register each io_u in the free list */
for (i = 0; i < td->io_u_freelist.nr; i++) {
struct io_u *io_u = td->io_u_freelist.io_us[i];
IBV_ACCESS_REMOTE_READ |
IBV_ACCESS_REMOTE_WRITE);
if (io_u->mr == NULL) {
- log_err("fio: ibv_reg_mr io_u failed\n");
+ log_err("fio: ibv_reg_mr io_u failed: %m\n");
return 1;
}
rd->send_buf.rmt_us[i].buf =
- htonll((uint64_t) (unsigned long)io_u->buf);
+ cpu_to_be64((uint64_t) (unsigned long)io_u->buf);
rd->send_buf.rmt_us[i].rkey = htonl(io_u->mr->rkey);
rd->send_buf.rmt_us[i].size = htonl(max_bs);
static void fio_rdmaio_cleanup(struct thread_data *td)
{
- struct rdmaio_data *rd = td->io_ops->data;
+ struct rdmaio_data *rd = td->io_ops_data;
if (rd)
free(rd);
td->o.open_files++;
}
- if (!td->io_ops->data) {
+ if (!td->io_ops_data) {
rd = malloc(sizeof(*rd));
memset(rd, 0, sizeof(*rd));
init_rand_seed(&rd->rand_state, (unsigned int) GOLDEN_RATIO_PRIME, 0);
- td->io_ops->data = rd;
+ td->io_ops_data = rd;
}
return 0;