From a6becc33deec29a715c3326c2286082cdc60a197 Mon Sep 17 00:00:00 2001 From: "Wang, Long" Date: Tue, 25 Jan 2022 10:18:14 +0100 Subject: [PATCH] rpma: add support for File System DAX File System DAX is handled in a different way than Device DAX: 1) In case of File System DAX, each thread uses a separate file from this file system and no offset is needed. In case of Device DAX, each thread uses a separate offset within the same Device DAX. 2) File System DAX requires rpma_mr_advise(3)(ibv_advise_mr(3)) to be called for the registered memory to avoid page faults and degraded performance. Ref: https://github.com/axboe/fio/issues/1238 Signed-off-by: Wang, Long --- engines/librpma_fio.c | 44 ++++++++++++++++++++++++++++++++----------- engines/librpma_fio.h | 2 +- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/engines/librpma_fio.c b/engines/librpma_fio.c index 3d605ed6..9d6ebf38 100644 --- a/engines/librpma_fio.c +++ b/engines/librpma_fio.c @@ -108,7 +108,7 @@ char *librpma_fio_allocate_dram(struct thread_data *td, size_t size, return mem_ptr; } -char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename, +char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f, size_t size, struct librpma_fio_mem *mem) { size_t size_mmap = 0; @@ -122,18 +122,24 @@ char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename, return NULL; } - ws_offset = (td->thread_number - 1) * size; + if (f->filetype == FIO_TYPE_CHAR) { + /* Each thread uses a separate offset within DeviceDAX. */ + ws_offset = (td->thread_number - 1) * size; + } else { + /* Each thread uses a separate FileSystemDAX file. No offset is needed. */ + ws_offset = 0; + } - if (!filename) { + if (!f->file_name) { log_err("fio: filename is not set\n"); return NULL; } /* map the file */ - mem_ptr = pmem_map_file(filename, 0 /* len */, 0 /* flags */, + mem_ptr = pmem_map_file(f->file_name, 0 /* len */, 0 /* flags */, 0 /* mode */, &size_mmap, &is_pmem); if (mem_ptr == NULL) { - log_err("fio: pmem_map_file(%s) failed\n", filename); + log_err("fio: pmem_map_file(%s) failed\n", f->file_name); /* pmem_map_file() sets errno on failure */ td_verror(td, errno, "pmem_map_file"); return NULL; @@ -142,7 +148,7 @@ char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename, /* pmem is expected */ if (!is_pmem) { log_err("fio: %s is not located in persistent memory\n", - filename); + f->file_name); goto err_unmap; } @@ -150,12 +156,12 @@ char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename, if (size_mmap < ws_offset + size) { log_err( "fio: %s is too small to handle so many threads (%zu < %zu)\n", - filename, size_mmap, ws_offset + size); + f->file_name, size_mmap, ws_offset + size); goto err_unmap; } log_info("fio: size of memory mapped from the file %s: %zu\n", - filename, size_mmap); + f->file_name, size_mmap); mem->mem_ptr = mem_ptr; mem->size_mmap = size_mmap; @@ -893,6 +899,7 @@ int librpma_fio_server_open_file(struct thread_data *td, struct fio_file *f, size_t mem_size = td->o.size; size_t mr_desc_size; void *ws_ptr; + bool is_dram; int usage_mem_type; int ret; @@ -910,14 +917,14 @@ int librpma_fio_server_open_file(struct thread_data *td, struct fio_file *f, return -1; } - if (strcmp(f->file_name, "malloc") == 0) { + is_dram = !strcmp(f->file_name, "malloc"); + if (is_dram) { /* allocation from DRAM using posix_memalign() */ ws_ptr = librpma_fio_allocate_dram(td, mem_size, &csd->mem); usage_mem_type = RPMA_MR_USAGE_FLUSH_TYPE_VISIBILITY; } else { /* allocation from PMEM using pmem_map_file() */ - ws_ptr = librpma_fio_allocate_pmem(td, f->file_name, - mem_size, &csd->mem); + ws_ptr = librpma_fio_allocate_pmem(td, f, mem_size, &csd->mem); usage_mem_type = RPMA_MR_USAGE_FLUSH_TYPE_PERSISTENT; } @@ -934,6 +941,21 @@ int librpma_fio_server_open_file(struct thread_data *td, struct fio_file *f, goto err_free; } + if (!is_dram && f->filetype == FIO_TYPE_FILE) { + ret = rpma_mr_advise(mr, 0, mem_size, + IBV_ADVISE_MR_ADVICE_PREFETCH_WRITE, + IBV_ADVISE_MR_FLAG_FLUSH); + if (ret) { + librpma_td_verror(td, ret, "rpma_mr_advise"); + /* an invalid argument is an error */ + if (ret == RPMA_E_INVAL) + goto err_mr_dereg; + + /* log_err used instead of log_info to avoid corruption of the JSON output */ + log_err("Note: having rpma_mr_advise(3) failed because of RPMA_E_NOSUPP or RPMA_E_PROVIDER may come with a performance penalty, but it is not a blocker for running the benchmark.\n"); + } + } + /* get size of the memory region's descriptor */ if ((ret = rpma_mr_get_descriptor_size(mr, &mr_desc_size))) { librpma_td_verror(td, ret, "rpma_mr_get_descriptor_size"); diff --git a/engines/librpma_fio.h b/engines/librpma_fio.h index fb89d99d..2c507e9c 100644 --- a/engines/librpma_fio.h +++ b/engines/librpma_fio.h @@ -77,7 +77,7 @@ struct librpma_fio_mem { char *librpma_fio_allocate_dram(struct thread_data *td, size_t size, struct librpma_fio_mem *mem); -char *librpma_fio_allocate_pmem(struct thread_data *td, const char *filename, +char *librpma_fio_allocate_pmem(struct thread_data *td, struct fio_file *f, size_t size, struct librpma_fio_mem *mem); void librpma_fio_free(struct librpma_fio_mem *mem); -- 2.25.1