cifs: Add a function to build an RDMA SGE list from an iterator
authorDavid Howells <dhowells@redhat.com>
Thu, 27 Oct 2022 16:36:11 +0000 (17:36 +0100)
committerSteve French <stfrench@microsoft.com>
Mon, 20 Feb 2023 23:25:44 +0000 (17:25 -0600)
Add a function to add elements onto an RDMA SGE list representing page
fragments extracted from a BVEC-, KVEC- or XARRAY-type iterator and DMA
mapped until the maximum number of elements is reached.

Nothing is done to make sure the pages remain present - that must be done
by the caller.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Steve French <sfrench@samba.org>
cc: Shyam Prasad N <nspmangalore@gmail.com>
cc: Rohith Surabattula <rohiths.msft@gmail.com>
cc: Tom Talpey <tom@talpey.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: linux-cifs@vger.kernel.org
cc: linux-fsdevel@vger.kernel.org
cc: linux-rdma@vger.kernel.org

Link: https://lore.kernel.org/r/166697256704.61150.17388516338310645808.stgit@warthog.procyon.org.uk/
Link: https://lore.kernel.org/r/166732028840.3186319.8512284239779728860.stgit@warthog.procyon.org.uk/
Signed-off-by: Steve French <stfrench@microsoft.com>
fs/cifs/smbdirect.c

index cf923f211c512c62305fcaee200a43ebb33bb443..179f9d79220c84969c1f8655875bb4a85590bf70 100644 (file)
@@ -44,6 +44,17 @@ static int smbd_post_send_page(struct smbd_connection *info,
 static void destroy_mr_list(struct smbd_connection *info);
 static int allocate_mr_list(struct smbd_connection *info);
 
+struct smb_extract_to_rdma {
+       struct ib_sge           *sge;
+       unsigned int            nr_sge;
+       unsigned int            max_sge;
+       struct ib_device        *device;
+       u32                     local_dma_lkey;
+       enum dma_data_direction direction;
+};
+static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
+                                       struct smb_extract_to_rdma *rdma);
+
 /* SMBD version number */
 #define SMBD_V1        0x0100
 
@@ -2492,3 +2503,206 @@ done:
 
        return rc;
 }
+
+static bool smb_set_sge(struct smb_extract_to_rdma *rdma,
+                       struct page *lowest_page, size_t off, size_t len)
+{
+       struct ib_sge *sge = &rdma->sge[rdma->nr_sge];
+       u64 addr;
+
+       addr = ib_dma_map_page(rdma->device, lowest_page,
+                              off, len, rdma->direction);
+       if (ib_dma_mapping_error(rdma->device, addr))
+               return false;
+
+       sge->addr   = addr;
+       sge->length = len;
+       sge->lkey   = rdma->local_dma_lkey;
+       rdma->nr_sge++;
+       return true;
+}
+
+/*
+ * Extract page fragments from a BVEC-class iterator and add them to an RDMA
+ * element list.  The pages are not pinned.
+ */
+static ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter,
+                                       struct smb_extract_to_rdma *rdma,
+                                       ssize_t maxsize)
+{
+       const struct bio_vec *bv = iter->bvec;
+       unsigned long start = iter->iov_offset;
+       unsigned int i;
+       ssize_t ret = 0;
+
+       for (i = 0; i < iter->nr_segs; i++) {
+               size_t off, len;
+
+               len = bv[i].bv_len;
+               if (start >= len) {
+                       start -= len;
+                       continue;
+               }
+
+               len = min_t(size_t, maxsize, len - start);
+               off = bv[i].bv_offset + start;
+
+               if (!smb_set_sge(rdma, bv[i].bv_page, off, len))
+                       return -EIO;
+
+               ret += len;
+               maxsize -= len;
+               if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
+                       break;
+               start = 0;
+       }
+
+       return ret;
+}
+
+/*
+ * Extract fragments from a KVEC-class iterator and add them to an RDMA list.
+ * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers.
+ * The pages are not pinned.
+ */
+static ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter,
+                                       struct smb_extract_to_rdma *rdma,
+                                       ssize_t maxsize)
+{
+       const struct kvec *kv = iter->kvec;
+       unsigned long start = iter->iov_offset;
+       unsigned int i;
+       ssize_t ret = 0;
+
+       for (i = 0; i < iter->nr_segs; i++) {
+               struct page *page;
+               unsigned long kaddr;
+               size_t off, len, seg;
+
+               len = kv[i].iov_len;
+               if (start >= len) {
+                       start -= len;
+                       continue;
+               }
+
+               kaddr = (unsigned long)kv[i].iov_base + start;
+               off = kaddr & ~PAGE_MASK;
+               len = min_t(size_t, maxsize, len - start);
+               kaddr &= PAGE_MASK;
+
+               maxsize -= len;
+               do {
+                       seg = min_t(size_t, len, PAGE_SIZE - off);
+
+                       if (is_vmalloc_or_module_addr((void *)kaddr))
+                               page = vmalloc_to_page((void *)kaddr);
+                       else
+                               page = virt_to_page(kaddr);
+
+                       if (!smb_set_sge(rdma, page, off, seg))
+                               return -EIO;
+
+                       ret += seg;
+                       len -= seg;
+                       kaddr += PAGE_SIZE;
+                       off = 0;
+               } while (len > 0 && rdma->nr_sge < rdma->max_sge);
+
+               if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
+                       break;
+               start = 0;
+       }
+
+       return ret;
+}
+
+/*
+ * Extract folio fragments from an XARRAY-class iterator and add them to an
+ * RDMA list.  The folios are not pinned.
+ */
+static ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter,
+                                         struct smb_extract_to_rdma *rdma,
+                                         ssize_t maxsize)
+{
+       struct xarray *xa = iter->xarray;
+       struct folio *folio;
+       loff_t start = iter->xarray_start + iter->iov_offset;
+       pgoff_t index = start / PAGE_SIZE;
+       ssize_t ret = 0;
+       size_t off, len;
+       XA_STATE(xas, xa, index);
+
+       rcu_read_lock();
+
+       xas_for_each(&xas, folio, ULONG_MAX) {
+               if (xas_retry(&xas, folio))
+                       continue;
+               if (WARN_ON(xa_is_value(folio)))
+                       break;
+               if (WARN_ON(folio_test_hugetlb(folio)))
+                       break;
+
+               off = offset_in_folio(folio, start);
+               len = min_t(size_t, maxsize, folio_size(folio) - off);
+
+               if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) {
+                       rcu_read_unlock();
+                       return -EIO;
+               }
+
+               maxsize -= len;
+               ret += len;
+               if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
+                       break;
+       }
+
+       rcu_read_unlock();
+       return ret;
+}
+
+/*
+ * Extract page fragments from up to the given amount of the source iterator
+ * and build up an RDMA list that refers to all of those bits.  The RDMA list
+ * is appended to, up to the maximum number of elements set in the parameter
+ * block.
+ *
+ * The extracted page fragments are not pinned or ref'd in any way; if an
+ * IOVEC/UBUF-type iterator is to be used, it should be converted to a
+ * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some
+ * way.
+ */
+static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
+                                       struct smb_extract_to_rdma *rdma)
+{
+       ssize_t ret;
+       int before = rdma->nr_sge;
+
+       switch (iov_iter_type(iter)) {
+       case ITER_BVEC:
+               ret = smb_extract_bvec_to_rdma(iter, rdma, len);
+               break;
+       case ITER_KVEC:
+               ret = smb_extract_kvec_to_rdma(iter, rdma, len);
+               break;
+       case ITER_XARRAY:
+               ret = smb_extract_xarray_to_rdma(iter, rdma, len);
+               break;
+       default:
+               WARN_ON_ONCE(1);
+               return -EIO;
+       }
+
+       if (ret > 0) {
+               iov_iter_advance(iter, ret);
+       } else if (ret < 0) {
+               while (rdma->nr_sge > before) {
+                       struct ib_sge *sge = &rdma->sge[rdma->nr_sge--];
+
+                       ib_dma_unmap_single(rdma->device, sge->addr, sge->length,
+                                           rdma->direction);
+                       sge->addr = 0;
+               }
+       }
+
+       return ret;
+}