ksmbd: add smbd max io size parameter
[linux-block.git] / fs / ksmbd / transport_rdma.c
CommitLineData
0626e664
NJ
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU General Public License for more details.
18 */
19
20#define SUBMOD_NAME "smb_direct"
21
22#include <linux/kthread.h>
0626e664
NJ
23#include <linux/list.h>
24#include <linux/mempool.h>
25#include <linux/highmem.h>
26#include <linux/scatterlist.h>
27#include <rdma/ib_verbs.h>
28#include <rdma/rdma_cm.h>
29#include <rdma/rw.h>
30
31#include "glob.h"
32#include "connection.h"
33#include "smb_common.h"
34#include "smbstatus.h"
0626e664
NJ
35#include "transport_rdma.h"
36
cb097b3d
NJ
37#define SMB_DIRECT_PORT_IWARP 5445
38#define SMB_DIRECT_PORT_INFINIBAND 445
0626e664
NJ
39
40#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
41
42/* SMB_DIRECT negotiation timeout in seconds */
43#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
44
45#define SMB_DIRECT_MAX_SEND_SGES 8
46#define SMB_DIRECT_MAX_RECV_SGES 1
47
48/*
49 * Default maximum number of RDMA read/write outstanding on this connection
50 * This value is possibly decreased during QP creation on hardware limit
51 */
52#define SMB_DIRECT_CM_INITIATOR_DEPTH 8
53
54/* Maximum number of retries on data transfer operations */
55#define SMB_DIRECT_CM_RETRY 6
56/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
57#define SMB_DIRECT_CM_RNR_RETRY 0
58
59/*
60 * User configurable initial values per SMB_DIRECT transport connection
92239588 61 * as defined in [MS-SMBD] 3.1.1.1
0626e664
NJ
62 * Those may change after a SMB_DIRECT negotiation
63 */
cb097b3d
NJ
64
65/* Set 445 port to SMB Direct port by default */
66static int smb_direct_port = SMB_DIRECT_PORT_INFINIBAND;
67
0626e664
NJ
68/* The local peer's maximum number of credits to grant to the peer */
69static int smb_direct_receive_credit_max = 255;
70
71/* The remote peer's credit request of local peer */
72static int smb_direct_send_credit_target = 255;
73
74/* The maximum single message size can be sent to remote peer */
75static int smb_direct_max_send_size = 8192;
76
77/* The maximum fragmented upper-layer payload receive size supported */
78static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
79
80/* The maximum single-message size which can be received */
81static int smb_direct_max_receive_size = 8192;
82
65bb45b9 83static int smb_direct_max_read_write_size = SMBD_DEFAULT_IOSIZE;
0626e664 84
31928a00
HL
85static LIST_HEAD(smb_direct_device_list);
86static DEFINE_RWLOCK(smb_direct_device_lock);
87
88struct smb_direct_device {
89 struct ib_device *ib_dev;
90 struct list_head list;
91};
92
0626e664
NJ
93static struct smb_direct_listener {
94 struct rdma_cm_id *cm_id;
95} smb_direct_listener;
96
0626e664
NJ
97static struct workqueue_struct *smb_direct_wq;
98
99enum smb_direct_status {
100 SMB_DIRECT_CS_NEW = 0,
101 SMB_DIRECT_CS_CONNECTED,
102 SMB_DIRECT_CS_DISCONNECTING,
103 SMB_DIRECT_CS_DISCONNECTED,
104};
105
106struct smb_direct_transport {
107 struct ksmbd_transport transport;
108
109 enum smb_direct_status status;
110 bool full_packet_received;
111 wait_queue_head_t wait_status;
112
113 struct rdma_cm_id *cm_id;
114 struct ib_cq *send_cq;
115 struct ib_cq *recv_cq;
116 struct ib_pd *pd;
117 struct ib_qp *qp;
118
119 int max_send_size;
120 int max_recv_size;
121 int max_fragmented_send_size;
122 int max_fragmented_recv_size;
123 int max_rdma_rw_size;
124
125 spinlock_t reassembly_queue_lock;
126 struct list_head reassembly_queue;
127 int reassembly_data_length;
128 int reassembly_queue_length;
129 int first_entry_offset;
130 wait_queue_head_t wait_reassembly_queue;
131
132 spinlock_t receive_credit_lock;
133 int recv_credits;
134 int count_avail_recvmsg;
135 int recv_credit_max;
136 int recv_credit_target;
137
138 spinlock_t recvmsg_queue_lock;
139 struct list_head recvmsg_queue;
140
141 spinlock_t empty_recvmsg_queue_lock;
142 struct list_head empty_recvmsg_queue;
143
144 int send_credit_target;
145 atomic_t send_credits;
146 spinlock_t lock_new_recv_credits;
147 int new_recv_credits;
ddbdc861
HL
148 int max_rw_credits;
149 int pages_per_rw_credit;
150 atomic_t rw_credits;
0626e664
NJ
151
152 wait_queue_head_t wait_send_credits;
ddbdc861 153 wait_queue_head_t wait_rw_credits;
0626e664
NJ
154
155 mempool_t *sendmsg_mempool;
156 struct kmem_cache *sendmsg_cache;
157 mempool_t *recvmsg_mempool;
158 struct kmem_cache *recvmsg_cache;
159
0626e664
NJ
160 wait_queue_head_t wait_send_pending;
161 atomic_t send_pending;
162
163 struct delayed_work post_recv_credits_work;
164 struct work_struct send_immediate_work;
165 struct work_struct disconnect_work;
166
167 bool negotiation_requested;
168};
169
170#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
0626e664
NJ
171
172enum {
173 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
174 SMB_DIRECT_MSG_DATA_TRANSFER
175};
176
177static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
178
179struct smb_direct_send_ctx {
180 struct list_head msg_list;
181 int wr_cnt;
182 bool need_invalidate_rkey;
183 unsigned int remote_key;
184};
185
186struct smb_direct_sendmsg {
187 struct smb_direct_transport *transport;
188 struct ib_send_wr wr;
189 struct list_head list;
190 int num_sge;
191 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
192 struct ib_cqe cqe;
193 u8 packet[];
194};
195
196struct smb_direct_recvmsg {
197 struct smb_direct_transport *transport;
198 struct list_head list;
199 int type;
200 struct ib_sge sge;
201 struct ib_cqe cqe;
202 bool first_segment;
203 u8 packet[];
204};
205
206struct smb_direct_rdma_rw_msg {
207 struct smb_direct_transport *t;
208 struct ib_cqe cqe;
ee1b0558 209 int status;
0626e664 210 struct completion *completion;
ee1b0558 211 struct list_head list;
0626e664
NJ
212 struct rdma_rw_ctx rw_ctx;
213 struct sg_table sgt;
5224f790 214 struct scatterlist sg_list[];
0626e664
NJ
215};
216
65bb45b9
NJ
217void init_smbd_max_io_size(unsigned int sz)
218{
219 sz = clamp_val(sz, SMBD_MIN_IOSIZE, SMBD_MAX_IOSIZE);
220 smb_direct_max_read_write_size = sz;
221}
222
8ad8dc34
HL
223static inline int get_buf_page_count(void *buf, int size)
224{
225 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
226 (uintptr_t)buf / PAGE_SIZE;
227}
0626e664
NJ
228
229static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
230static void smb_direct_post_recv_credits(struct work_struct *work);
231static int smb_direct_post_send_data(struct smb_direct_transport *t,
070fb21e
NJ
232 struct smb_direct_send_ctx *send_ctx,
233 struct kvec *iov, int niov,
234 int remaining_data_length);
0626e664 235
02d4b4aa
NJ
236static inline struct smb_direct_transport *
237smb_trans_direct_transfort(struct ksmbd_transport *t)
238{
239 return container_of(t, struct smb_direct_transport, transport);
240}
241
0626e664
NJ
242static inline void
243*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
244{
245 return (void *)recvmsg->packet;
246}
247
248static inline bool is_receive_credit_post_required(int receive_credits,
070fb21e 249 int avail_recvmsg_count)
0626e664
NJ
250{
251 return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
252 avail_recvmsg_count >= (receive_credits >> 2);
253}
254
255static struct
256smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
257{
258 struct smb_direct_recvmsg *recvmsg = NULL;
259
260 spin_lock(&t->recvmsg_queue_lock);
261 if (!list_empty(&t->recvmsg_queue)) {
262 recvmsg = list_first_entry(&t->recvmsg_queue,
263 struct smb_direct_recvmsg,
264 list);
265 list_del(&recvmsg->list);
266 }
267 spin_unlock(&t->recvmsg_queue_lock);
268 return recvmsg;
269}
270
271static void put_recvmsg(struct smb_direct_transport *t,
070fb21e 272 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
273{
274 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
070fb21e 275 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
276
277 spin_lock(&t->recvmsg_queue_lock);
278 list_add(&recvmsg->list, &t->recvmsg_queue);
279 spin_unlock(&t->recvmsg_queue_lock);
0626e664
NJ
280}
281
282static struct
283smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
284{
285 struct smb_direct_recvmsg *recvmsg = NULL;
286
287 spin_lock(&t->empty_recvmsg_queue_lock);
288 if (!list_empty(&t->empty_recvmsg_queue)) {
64b39f4a 289 recvmsg = list_first_entry(&t->empty_recvmsg_queue,
070fb21e 290 struct smb_direct_recvmsg, list);
0626e664
NJ
291 list_del(&recvmsg->list);
292 }
293 spin_unlock(&t->empty_recvmsg_queue_lock);
294 return recvmsg;
295}
296
297static void put_empty_recvmsg(struct smb_direct_transport *t,
070fb21e 298 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
299{
300 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
070fb21e 301 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
302
303 spin_lock(&t->empty_recvmsg_queue_lock);
304 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
305 spin_unlock(&t->empty_recvmsg_queue_lock);
306}
307
308static void enqueue_reassembly(struct smb_direct_transport *t,
070fb21e
NJ
309 struct smb_direct_recvmsg *recvmsg,
310 int data_length)
0626e664
NJ
311{
312 spin_lock(&t->reassembly_queue_lock);
313 list_add_tail(&recvmsg->list, &t->reassembly_queue);
314 t->reassembly_queue_length++;
315 /*
316 * Make sure reassembly_data_length is updated after list and
317 * reassembly_queue_length are updated. On the dequeue side
318 * reassembly_data_length is checked without a lock to determine
319 * if reassembly_queue_length and list is up to date
320 */
321 virt_wmb();
322 t->reassembly_data_length += data_length;
323 spin_unlock(&t->reassembly_queue_lock);
0626e664
NJ
324}
325
64b39f4a 326static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
0626e664
NJ
327{
328 if (!list_empty(&t->reassembly_queue))
329 return list_first_entry(&t->reassembly_queue,
330 struct smb_direct_recvmsg, list);
331 else
332 return NULL;
333}
334
335static void smb_direct_disconnect_rdma_work(struct work_struct *work)
336{
337 struct smb_direct_transport *t =
338 container_of(work, struct smb_direct_transport,
339 disconnect_work);
340
341 if (t->status == SMB_DIRECT_CS_CONNECTED) {
342 t->status = SMB_DIRECT_CS_DISCONNECTING;
343 rdma_disconnect(t->cm_id);
344 }
345}
346
347static void
348smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
349{
323b1ea1
HL
350 if (t->status == SMB_DIRECT_CS_CONNECTED)
351 queue_work(smb_direct_wq, &t->disconnect_work);
0626e664
NJ
352}
353
354static void smb_direct_send_immediate_work(struct work_struct *work)
355{
356 struct smb_direct_transport *t = container_of(work,
357 struct smb_direct_transport, send_immediate_work);
358
359 if (t->status != SMB_DIRECT_CS_CONNECTED)
360 return;
361
362 smb_direct_post_send_data(t, NULL, NULL, 0, 0);
363}
364
365static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
366{
367 struct smb_direct_transport *t;
368 struct ksmbd_conn *conn;
369
370 t = kzalloc(sizeof(*t), GFP_KERNEL);
371 if (!t)
372 return NULL;
373
374 t->cm_id = cm_id;
375 cm_id->context = t;
376
377 t->status = SMB_DIRECT_CS_NEW;
378 init_waitqueue_head(&t->wait_status);
379
380 spin_lock_init(&t->reassembly_queue_lock);
381 INIT_LIST_HEAD(&t->reassembly_queue);
382 t->reassembly_data_length = 0;
383 t->reassembly_queue_length = 0;
384 init_waitqueue_head(&t->wait_reassembly_queue);
385 init_waitqueue_head(&t->wait_send_credits);
ddbdc861 386 init_waitqueue_head(&t->wait_rw_credits);
0626e664
NJ
387
388 spin_lock_init(&t->receive_credit_lock);
389 spin_lock_init(&t->recvmsg_queue_lock);
390 INIT_LIST_HEAD(&t->recvmsg_queue);
391
392 spin_lock_init(&t->empty_recvmsg_queue_lock);
393 INIT_LIST_HEAD(&t->empty_recvmsg_queue);
394
0626e664
NJ
395 init_waitqueue_head(&t->wait_send_pending);
396 atomic_set(&t->send_pending, 0);
397
398 spin_lock_init(&t->lock_new_recv_credits);
399
400 INIT_DELAYED_WORK(&t->post_recv_credits_work,
401 smb_direct_post_recv_credits);
402 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
403 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
404
405 conn = ksmbd_conn_alloc();
406 if (!conn)
407 goto err;
408 conn->transport = KSMBD_TRANS(t);
409 KSMBD_TRANS(t)->conn = conn;
410 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
411 return t;
412err:
413 kfree(t);
414 return NULL;
415}
416
417static void free_transport(struct smb_direct_transport *t)
418{
419 struct smb_direct_recvmsg *recvmsg;
420
421 wake_up_interruptible(&t->wait_send_credits);
422
423 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
0626e664 424 wait_event(t->wait_send_pending,
070fb21e 425 atomic_read(&t->send_pending) == 0);
0626e664
NJ
426
427 cancel_work_sync(&t->disconnect_work);
428 cancel_delayed_work_sync(&t->post_recv_credits_work);
429 cancel_work_sync(&t->send_immediate_work);
430
431 if (t->qp) {
432 ib_drain_qp(t->qp);
c9f18927 433 ib_mr_pool_destroy(t->qp, &t->qp->rdma_mrs);
0626e664
NJ
434 ib_destroy_qp(t->qp);
435 }
436
437 ksmbd_debug(RDMA, "drain the reassembly queue\n");
438 do {
439 spin_lock(&t->reassembly_queue_lock);
440 recvmsg = get_first_reassembly(t);
441 if (recvmsg) {
442 list_del(&recvmsg->list);
64b39f4a 443 spin_unlock(&t->reassembly_queue_lock);
0626e664 444 put_recvmsg(t, recvmsg);
64b39f4a 445 } else {
0626e664 446 spin_unlock(&t->reassembly_queue_lock);
64b39f4a 447 }
0626e664
NJ
448 } while (recvmsg);
449 t->reassembly_data_length = 0;
450
451 if (t->send_cq)
452 ib_free_cq(t->send_cq);
453 if (t->recv_cq)
454 ib_free_cq(t->recv_cq);
455 if (t->pd)
456 ib_dealloc_pd(t->pd);
457 if (t->cm_id)
458 rdma_destroy_id(t->cm_id);
459
460 smb_direct_destroy_pools(t);
461 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
462 kfree(t);
463}
464
465static struct smb_direct_sendmsg
466*smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
467{
468 struct smb_direct_sendmsg *msg;
469
470 msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
471 if (!msg)
472 return ERR_PTR(-ENOMEM);
473 msg->transport = t;
474 INIT_LIST_HEAD(&msg->list);
475 msg->num_sge = 0;
476 return msg;
477}
478
479static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
070fb21e 480 struct smb_direct_sendmsg *msg)
0626e664
NJ
481{
482 int i;
483
484 if (msg->num_sge > 0) {
485 ib_dma_unmap_single(t->cm_id->device,
070fb21e
NJ
486 msg->sge[0].addr, msg->sge[0].length,
487 DMA_TO_DEVICE);
0626e664
NJ
488 for (i = 1; i < msg->num_sge; i++)
489 ib_dma_unmap_page(t->cm_id->device,
070fb21e
NJ
490 msg->sge[i].addr, msg->sge[i].length,
491 DMA_TO_DEVICE);
0626e664
NJ
492 }
493 mempool_free(msg, t->sendmsg_mempool);
494}
495
496static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
497{
498 switch (recvmsg->type) {
499 case SMB_DIRECT_MSG_DATA_TRANSFER: {
500 struct smb_direct_data_transfer *req =
64b39f4a
NJ
501 (struct smb_direct_data_transfer *)recvmsg->packet;
502 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
cb451720 503 + le32_to_cpu(req->data_offset));
0626e664 504 ksmbd_debug(RDMA,
070fb21e
NJ
505 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
506 le16_to_cpu(req->credits_granted),
507 le16_to_cpu(req->credits_requested),
508 req->data_length, req->remaining_data_length,
509 hdr->ProtocolId, hdr->Command);
0626e664
NJ
510 break;
511 }
512 case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
513 struct smb_direct_negotiate_req *req =
514 (struct smb_direct_negotiate_req *)recvmsg->packet;
515 ksmbd_debug(RDMA,
070fb21e
NJ
516 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
517 le16_to_cpu(req->min_version),
518 le16_to_cpu(req->max_version),
519 le16_to_cpu(req->credits_requested),
520 le32_to_cpu(req->preferred_send_size),
521 le32_to_cpu(req->max_receive_size),
522 le32_to_cpu(req->max_fragmented_size));
0626e664 523 if (le16_to_cpu(req->min_version) > 0x0100 ||
64b39f4a 524 le16_to_cpu(req->max_version) < 0x0100)
0626e664
NJ
525 return -EOPNOTSUPP;
526 if (le16_to_cpu(req->credits_requested) <= 0 ||
64b39f4a
NJ
527 le32_to_cpu(req->max_receive_size) <= 128 ||
528 le32_to_cpu(req->max_fragmented_size) <=
529 128 * 1024)
0626e664
NJ
530 return -ECONNABORTED;
531
532 break;
533 }
534 default:
535 return -EINVAL;
536 }
537 return 0;
538}
539
540static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
541{
542 struct smb_direct_recvmsg *recvmsg;
543 struct smb_direct_transport *t;
544
545 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
546 t = recvmsg->transport;
547
548 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
549 if (wc->status != IB_WC_WR_FLUSH_ERR) {
bde1694a
NJ
550 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
551 ib_wc_status_msg(wc->status), wc->status,
552 wc->opcode);
0626e664
NJ
553 smb_direct_disconnect_rdma_connection(t);
554 }
555 put_empty_recvmsg(t, recvmsg);
556 return;
557 }
558
559 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
070fb21e
NJ
560 ib_wc_status_msg(wc->status), wc->status,
561 wc->opcode);
0626e664
NJ
562
563 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
070fb21e 564 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
565
566 switch (recvmsg->type) {
567 case SMB_DIRECT_MSG_NEGOTIATE_REQ:
2ea086e3
HL
568 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
569 put_empty_recvmsg(t, recvmsg);
570 return;
571 }
0626e664
NJ
572 t->negotiation_requested = true;
573 t->full_packet_received = true;
99b7650a 574 enqueue_reassembly(t, recvmsg, 0);
0626e664
NJ
575 wake_up_interruptible(&t->wait_status);
576 break;
577 case SMB_DIRECT_MSG_DATA_TRANSFER: {
578 struct smb_direct_data_transfer *data_transfer =
579 (struct smb_direct_data_transfer *)recvmsg->packet;
2ea086e3 580 unsigned int data_length;
0626e664
NJ
581 int avail_recvmsg_count, receive_credits;
582
2ea086e3
HL
583 if (wc->byte_len <
584 offsetof(struct smb_direct_data_transfer, padding)) {
585 put_empty_recvmsg(t, recvmsg);
586 return;
587 }
588
589 data_length = le32_to_cpu(data_transfer->data_length);
0626e664 590 if (data_length) {
2ea086e3
HL
591 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
592 (u64)data_length) {
593 put_empty_recvmsg(t, recvmsg);
594 return;
595 }
596
0626e664
NJ
597 if (t->full_packet_received)
598 recvmsg->first_segment = true;
599
600 if (le32_to_cpu(data_transfer->remaining_data_length))
601 t->full_packet_received = false;
602 else
603 t->full_packet_received = true;
604
2ea086e3 605 enqueue_reassembly(t, recvmsg, (int)data_length);
0626e664
NJ
606 wake_up_interruptible(&t->wait_reassembly_queue);
607
608 spin_lock(&t->receive_credit_lock);
609 receive_credits = --(t->recv_credits);
610 avail_recvmsg_count = t->count_avail_recvmsg;
611 spin_unlock(&t->receive_credit_lock);
612 } else {
613 put_empty_recvmsg(t, recvmsg);
614
615 spin_lock(&t->receive_credit_lock);
616 receive_credits = --(t->recv_credits);
617 avail_recvmsg_count = ++(t->count_avail_recvmsg);
618 spin_unlock(&t->receive_credit_lock);
619 }
620
621 t->recv_credit_target =
622 le16_to_cpu(data_transfer->credits_requested);
623 atomic_add(le16_to_cpu(data_transfer->credits_granted),
070fb21e 624 &t->send_credits);
0626e664
NJ
625
626 if (le16_to_cpu(data_transfer->flags) &
070fb21e 627 SMB_DIRECT_RESPONSE_REQUESTED)
0626e664
NJ
628 queue_work(smb_direct_wq, &t->send_immediate_work);
629
630 if (atomic_read(&t->send_credits) > 0)
631 wake_up_interruptible(&t->wait_send_credits);
632
64b39f4a 633 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
0626e664 634 mod_delayed_work(smb_direct_wq,
070fb21e 635 &t->post_recv_credits_work, 0);
0626e664
NJ
636 break;
637 }
638 default:
639 break;
640 }
641}
642
643static int smb_direct_post_recv(struct smb_direct_transport *t,
070fb21e 644 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
645{
646 struct ib_recv_wr wr;
647 int ret;
648
649 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
070fb21e
NJ
650 recvmsg->packet, t->max_recv_size,
651 DMA_FROM_DEVICE);
0626e664
NJ
652 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
653 if (ret)
654 return ret;
655 recvmsg->sge.length = t->max_recv_size;
656 recvmsg->sge.lkey = t->pd->local_dma_lkey;
657 recvmsg->cqe.done = recv_done;
658
659 wr.wr_cqe = &recvmsg->cqe;
660 wr.next = NULL;
661 wr.sg_list = &recvmsg->sge;
662 wr.num_sge = 1;
663
664 ret = ib_post_recv(t->qp, &wr, NULL);
665 if (ret) {
bde1694a 666 pr_err("Can't post recv: %d\n", ret);
0626e664 667 ib_dma_unmap_single(t->cm_id->device,
070fb21e
NJ
668 recvmsg->sge.addr, recvmsg->sge.length,
669 DMA_FROM_DEVICE);
0626e664
NJ
670 smb_direct_disconnect_rdma_connection(t);
671 return ret;
672 }
673 return ret;
674}
675
676static int smb_direct_read(struct ksmbd_transport *t, char *buf,
070fb21e 677 unsigned int size)
0626e664
NJ
678{
679 struct smb_direct_recvmsg *recvmsg;
680 struct smb_direct_data_transfer *data_transfer;
681 int to_copy, to_read, data_read, offset;
682 u32 data_length, remaining_data_length, data_offset;
683 int rc;
02d4b4aa 684 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
685
686again:
687 if (st->status != SMB_DIRECT_CS_CONNECTED) {
bde1694a 688 pr_err("disconnected\n");
0626e664
NJ
689 return -ENOTCONN;
690 }
691
692 /*
693 * No need to hold the reassembly queue lock all the time as we are
694 * the only one reading from the front of the queue. The transport
695 * may add more entries to the back of the queue at the same time
696 */
697 if (st->reassembly_data_length >= size) {
698 int queue_length;
699 int queue_removed = 0;
700
701 /*
702 * Need to make sure reassembly_data_length is read before
703 * reading reassembly_queue_length and calling
704 * get_first_reassembly. This call is lock free
705 * as we never read at the end of the queue which are being
706 * updated in SOFTIRQ as more data is received
707 */
708 virt_rmb();
709 queue_length = st->reassembly_queue_length;
710 data_read = 0;
711 to_read = size;
712 offset = st->first_entry_offset;
713 while (data_read < size) {
714 recvmsg = get_first_reassembly(st);
715 data_transfer = smb_direct_recvmsg_payload(recvmsg);
716 data_length = le32_to_cpu(data_transfer->data_length);
717 remaining_data_length =
64b39f4a 718 le32_to_cpu(data_transfer->remaining_data_length);
0626e664
NJ
719 data_offset = le32_to_cpu(data_transfer->data_offset);
720
721 /*
722 * The upper layer expects RFC1002 length at the
723 * beginning of the payload. Return it to indicate
724 * the total length of the packet. This minimize the
725 * change to upper layer packet processing logic. This
726 * will be eventually remove when an intermediate
727 * transport layer is added
728 */
729 if (recvmsg->first_segment && size == 4) {
730 unsigned int rfc1002_len =
731 data_length + remaining_data_length;
732 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
733 data_read = 4;
734 recvmsg->first_segment = false;
735 ksmbd_debug(RDMA,
070fb21e
NJ
736 "returning rfc1002 length %d\n",
737 rfc1002_len);
0626e664
NJ
738 goto read_rfc1002_done;
739 }
740
741 to_copy = min_t(int, data_length - offset, to_read);
64b39f4a 742 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
070fb21e 743 to_copy);
0626e664
NJ
744
745 /* move on to the next buffer? */
746 if (to_copy == data_length - offset) {
747 queue_length--;
748 /*
749 * No need to lock if we are not at the
750 * end of the queue
751 */
64b39f4a 752 if (queue_length) {
0626e664 753 list_del(&recvmsg->list);
64b39f4a
NJ
754 } else {
755 spin_lock_irq(&st->reassembly_queue_lock);
0626e664 756 list_del(&recvmsg->list);
64b39f4a 757 spin_unlock_irq(&st->reassembly_queue_lock);
0626e664
NJ
758 }
759 queue_removed++;
760 put_recvmsg(st, recvmsg);
761 offset = 0;
64b39f4a 762 } else {
0626e664 763 offset += to_copy;
64b39f4a 764 }
0626e664
NJ
765
766 to_read -= to_copy;
767 data_read += to_copy;
768 }
769
770 spin_lock_irq(&st->reassembly_queue_lock);
771 st->reassembly_data_length -= data_read;
772 st->reassembly_queue_length -= queue_removed;
773 spin_unlock_irq(&st->reassembly_queue_lock);
774
775 spin_lock(&st->receive_credit_lock);
776 st->count_avail_recvmsg += queue_removed;
64b39f4a 777 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
0626e664
NJ
778 spin_unlock(&st->receive_credit_lock);
779 mod_delayed_work(smb_direct_wq,
070fb21e 780 &st->post_recv_credits_work, 0);
64b39f4a 781 } else {
0626e664 782 spin_unlock(&st->receive_credit_lock);
64b39f4a 783 }
0626e664
NJ
784
785 st->first_entry_offset = offset;
786 ksmbd_debug(RDMA,
070fb21e
NJ
787 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
788 data_read, st->reassembly_data_length,
789 st->first_entry_offset);
0626e664
NJ
790read_rfc1002_done:
791 return data_read;
792 }
793
794 ksmbd_debug(RDMA, "wait_event on more data\n");
64b39f4a 795 rc = wait_event_interruptible(st->wait_reassembly_queue,
070fb21e
NJ
796 st->reassembly_data_length >= size ||
797 st->status != SMB_DIRECT_CS_CONNECTED);
0626e664
NJ
798 if (rc)
799 return -EINTR;
800
801 goto again;
802}
803
804static void smb_direct_post_recv_credits(struct work_struct *work)
805{
806 struct smb_direct_transport *t = container_of(work,
807 struct smb_direct_transport, post_recv_credits_work.work);
808 struct smb_direct_recvmsg *recvmsg;
809 int receive_credits, credits = 0;
810 int ret;
811 int use_free = 1;
812
813 spin_lock(&t->receive_credit_lock);
814 receive_credits = t->recv_credits;
815 spin_unlock(&t->receive_credit_lock);
816
817 if (receive_credits < t->recv_credit_target) {
818 while (true) {
819 if (use_free)
820 recvmsg = get_free_recvmsg(t);
821 else
822 recvmsg = get_empty_recvmsg(t);
823 if (!recvmsg) {
824 if (use_free) {
825 use_free = 0;
826 continue;
64b39f4a 827 } else {
0626e664 828 break;
64b39f4a 829 }
0626e664
NJ
830 }
831
832 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
833 recvmsg->first_segment = false;
834
835 ret = smb_direct_post_recv(t, recvmsg);
836 if (ret) {
bde1694a 837 pr_err("Can't post recv: %d\n", ret);
0626e664
NJ
838 put_recvmsg(t, recvmsg);
839 break;
840 }
841 credits++;
842 }
843 }
844
845 spin_lock(&t->receive_credit_lock);
846 t->recv_credits += credits;
847 t->count_avail_recvmsg -= credits;
848 spin_unlock(&t->receive_credit_lock);
849
850 spin_lock(&t->lock_new_recv_credits);
851 t->new_recv_credits += credits;
852 spin_unlock(&t->lock_new_recv_credits);
853
854 if (credits)
855 queue_work(smb_direct_wq, &t->send_immediate_work);
856}
857
858static void send_done(struct ib_cq *cq, struct ib_wc *wc)
859{
860 struct smb_direct_sendmsg *sendmsg, *sibling;
861 struct smb_direct_transport *t;
862 struct list_head *pos, *prev, *end;
863
864 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
865 t = sendmsg->transport;
866
867 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
070fb21e
NJ
868 ib_wc_status_msg(wc->status), wc->status,
869 wc->opcode);
0626e664
NJ
870
871 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
bde1694a
NJ
872 pr_err("Send error. status='%s (%d)', opcode=%d\n",
873 ib_wc_status_msg(wc->status), wc->status,
874 wc->opcode);
0626e664
NJ
875 smb_direct_disconnect_rdma_connection(t);
876 }
877
11659a8d
HL
878 if (atomic_dec_and_test(&t->send_pending))
879 wake_up(&t->wait_send_pending);
0626e664
NJ
880
881 /* iterate and free the list of messages in reverse. the list's head
882 * is invalid.
883 */
884 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
070fb21e 885 prev != end; pos = prev, prev = prev->prev) {
0626e664
NJ
886 sibling = container_of(pos, struct smb_direct_sendmsg, list);
887 smb_direct_free_sendmsg(t, sibling);
888 }
889
890 sibling = container_of(pos, struct smb_direct_sendmsg, list);
891 smb_direct_free_sendmsg(t, sibling);
892}
893
894static int manage_credits_prior_sending(struct smb_direct_transport *t)
895{
896 int new_credits;
897
898 spin_lock(&t->lock_new_recv_credits);
899 new_credits = t->new_recv_credits;
900 t->new_recv_credits = 0;
901 spin_unlock(&t->lock_new_recv_credits);
902
903 return new_credits;
904}
905
906static int smb_direct_post_send(struct smb_direct_transport *t,
070fb21e 907 struct ib_send_wr *wr)
0626e664
NJ
908{
909 int ret;
910
11659a8d 911 atomic_inc(&t->send_pending);
0626e664
NJ
912 ret = ib_post_send(t->qp, wr, NULL);
913 if (ret) {
bde1694a 914 pr_err("failed to post send: %d\n", ret);
11659a8d
HL
915 if (atomic_dec_and_test(&t->send_pending))
916 wake_up(&t->wait_send_pending);
0626e664
NJ
917 smb_direct_disconnect_rdma_connection(t);
918 }
919 return ret;
920}
921
922static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
070fb21e
NJ
923 struct smb_direct_send_ctx *send_ctx,
924 bool need_invalidate_rkey,
925 unsigned int remote_key)
0626e664
NJ
926{
927 INIT_LIST_HEAD(&send_ctx->msg_list);
928 send_ctx->wr_cnt = 0;
929 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
930 send_ctx->remote_key = remote_key;
931}
932
933static int smb_direct_flush_send_list(struct smb_direct_transport *t,
070fb21e
NJ
934 struct smb_direct_send_ctx *send_ctx,
935 bool is_last)
0626e664
NJ
936{
937 struct smb_direct_sendmsg *first, *last;
938 int ret;
939
940 if (list_empty(&send_ctx->msg_list))
941 return 0;
942
943 first = list_first_entry(&send_ctx->msg_list,
070fb21e
NJ
944 struct smb_direct_sendmsg,
945 list);
0626e664 946 last = list_last_entry(&send_ctx->msg_list,
070fb21e
NJ
947 struct smb_direct_sendmsg,
948 list);
0626e664
NJ
949
950 last->wr.send_flags = IB_SEND_SIGNALED;
951 last->wr.wr_cqe = &last->cqe;
952 if (is_last && send_ctx->need_invalidate_rkey) {
953 last->wr.opcode = IB_WR_SEND_WITH_INV;
954 last->wr.ex.invalidate_rkey = send_ctx->remote_key;
955 }
956
957 ret = smb_direct_post_send(t, &first->wr);
958 if (!ret) {
959 smb_direct_send_ctx_init(t, send_ctx,
070fb21e
NJ
960 send_ctx->need_invalidate_rkey,
961 send_ctx->remote_key);
0626e664
NJ
962 } else {
963 atomic_add(send_ctx->wr_cnt, &t->send_credits);
964 wake_up(&t->wait_send_credits);
965 list_for_each_entry_safe(first, last, &send_ctx->msg_list,
070fb21e 966 list) {
0626e664
NJ
967 smb_direct_free_sendmsg(t, first);
968 }
969 }
970 return ret;
971}
972
973static int wait_for_credits(struct smb_direct_transport *t,
ddbdc861
HL
974 wait_queue_head_t *waitq, atomic_t *total_credits,
975 int needed)
0626e664
NJ
976{
977 int ret;
978
979 do {
ddbdc861 980 if (atomic_sub_return(needed, total_credits) >= 0)
0626e664
NJ
981 return 0;
982
ddbdc861 983 atomic_add(needed, total_credits);
0626e664 984 ret = wait_event_interruptible(*waitq,
ddbdc861
HL
985 atomic_read(total_credits) >= needed ||
986 t->status != SMB_DIRECT_CS_CONNECTED);
0626e664
NJ
987
988 if (t->status != SMB_DIRECT_CS_CONNECTED)
989 return -ENOTCONN;
990 else if (ret < 0)
991 return ret;
992 } while (true);
993}
994
995static int wait_for_send_credits(struct smb_direct_transport *t,
070fb21e 996 struct smb_direct_send_ctx *send_ctx)
0626e664
NJ
997{
998 int ret;
999
070fb21e
NJ
1000 if (send_ctx &&
1001 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
0626e664
NJ
1002 ret = smb_direct_flush_send_list(t, send_ctx, false);
1003 if (ret)
1004 return ret;
1005 }
1006
ddbdc861
HL
1007 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits, 1);
1008}
1009
1010static int wait_for_rw_credits(struct smb_direct_transport *t, int credits)
1011{
1012 return wait_for_credits(t, &t->wait_rw_credits, &t->rw_credits, credits);
1013}
1014
1015static int calc_rw_credits(struct smb_direct_transport *t,
1016 char *buf, unsigned int len)
1017{
1018 return DIV_ROUND_UP(get_buf_page_count(buf, len),
1019 t->pages_per_rw_credit);
0626e664
NJ
1020}
1021
1022static int smb_direct_create_header(struct smb_direct_transport *t,
070fb21e
NJ
1023 int size, int remaining_data_length,
1024 struct smb_direct_sendmsg **sendmsg_out)
0626e664
NJ
1025{
1026 struct smb_direct_sendmsg *sendmsg;
1027 struct smb_direct_data_transfer *packet;
1028 int header_length;
1029 int ret;
1030
1031 sendmsg = smb_direct_alloc_sendmsg(t);
8ef32967
DC
1032 if (IS_ERR(sendmsg))
1033 return PTR_ERR(sendmsg);
0626e664
NJ
1034
1035 /* Fill in the packet header */
1036 packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1037 packet->credits_requested = cpu_to_le16(t->send_credit_target);
1038 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1039
1040 packet->flags = 0;
1041 packet->reserved = 0;
1042 if (!size)
1043 packet->data_offset = 0;
1044 else
1045 packet->data_offset = cpu_to_le32(24);
1046 packet->data_length = cpu_to_le32(size);
1047 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1048 packet->padding = 0;
1049
1050 ksmbd_debug(RDMA,
070fb21e
NJ
1051 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1052 le16_to_cpu(packet->credits_requested),
1053 le16_to_cpu(packet->credits_granted),
1054 le32_to_cpu(packet->data_offset),
1055 le32_to_cpu(packet->data_length),
1056 le32_to_cpu(packet->remaining_data_length));
0626e664
NJ
1057
1058 /* Map the packet to DMA */
1059 header_length = sizeof(struct smb_direct_data_transfer);
1060 /* If this is a packet without payload, don't send padding */
1061 if (!size)
1062 header_length =
1063 offsetof(struct smb_direct_data_transfer, padding);
1064
1065 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1066 (void *)packet,
1067 header_length,
1068 DMA_TO_DEVICE);
1069 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1070 if (ret) {
1071 smb_direct_free_sendmsg(t, sendmsg);
1072 return ret;
1073 }
1074
1075 sendmsg->num_sge = 1;
1076 sendmsg->sge[0].length = header_length;
1077 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1078
1079 *sendmsg_out = sendmsg;
1080 return 0;
1081}
1082
64b39f4a 1083static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
0626e664
NJ
1084{
1085 bool high = is_vmalloc_addr(buf);
1086 struct page *page;
1087 int offset, len;
1088 int i = 0;
1089
4e3edd00 1090 if (size <= 0 || nentries < get_buf_page_count(buf, size))
0626e664
NJ
1091 return -EINVAL;
1092
1093 offset = offset_in_page(buf);
1094 buf -= offset;
1095 while (size > 0) {
1096 len = min_t(int, PAGE_SIZE - offset, size);
1097 if (high)
1098 page = vmalloc_to_page(buf);
1099 else
1100 page = kmap_to_page(buf);
1101
1102 if (!sg_list)
1103 return -EINVAL;
1104 sg_set_page(sg_list, page, len, offset);
1105 sg_list = sg_next(sg_list);
1106
1107 buf += PAGE_SIZE;
1108 size -= len;
1109 offset = 0;
1110 i++;
1111 }
1112 return i;
1113}
1114
1115static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
070fb21e
NJ
1116 struct scatterlist *sg_list, int nentries,
1117 enum dma_data_direction dir)
0626e664
NJ
1118{
1119 int npages;
1120
1121 npages = get_sg_list(buf, size, sg_list, nentries);
4e3edd00 1122 if (npages < 0)
0626e664
NJ
1123 return -EINVAL;
1124 return ib_dma_map_sg(device, sg_list, npages, dir);
1125}
1126
1127static int post_sendmsg(struct smb_direct_transport *t,
070fb21e
NJ
1128 struct smb_direct_send_ctx *send_ctx,
1129 struct smb_direct_sendmsg *msg)
0626e664
NJ
1130{
1131 int i;
1132
1133 for (i = 0; i < msg->num_sge; i++)
1134 ib_dma_sync_single_for_device(t->cm_id->device,
070fb21e
NJ
1135 msg->sge[i].addr, msg->sge[i].length,
1136 DMA_TO_DEVICE);
0626e664
NJ
1137
1138 msg->cqe.done = send_done;
1139 msg->wr.opcode = IB_WR_SEND;
1140 msg->wr.sg_list = &msg->sge[0];
1141 msg->wr.num_sge = msg->num_sge;
1142 msg->wr.next = NULL;
1143
1144 if (send_ctx) {
1145 msg->wr.wr_cqe = NULL;
1146 msg->wr.send_flags = 0;
1147 if (!list_empty(&send_ctx->msg_list)) {
1148 struct smb_direct_sendmsg *last;
1149
1150 last = list_last_entry(&send_ctx->msg_list,
1151 struct smb_direct_sendmsg,
1152 list);
1153 last->wr.next = &msg->wr;
1154 }
1155 list_add_tail(&msg->list, &send_ctx->msg_list);
1156 send_ctx->wr_cnt++;
1157 return 0;
1158 }
1159
1160 msg->wr.wr_cqe = &msg->cqe;
1161 msg->wr.send_flags = IB_SEND_SIGNALED;
1162 return smb_direct_post_send(t, &msg->wr);
1163}
1164
1165static int smb_direct_post_send_data(struct smb_direct_transport *t,
070fb21e
NJ
1166 struct smb_direct_send_ctx *send_ctx,
1167 struct kvec *iov, int niov,
1168 int remaining_data_length)
0626e664
NJ
1169{
1170 int i, j, ret;
1171 struct smb_direct_sendmsg *msg;
1172 int data_length;
64b39f4a 1173 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
0626e664
NJ
1174
1175 ret = wait_for_send_credits(t, send_ctx);
1176 if (ret)
1177 return ret;
1178
1179 data_length = 0;
1180 for (i = 0; i < niov; i++)
1181 data_length += iov[i].iov_len;
1182
1183 ret = smb_direct_create_header(t, data_length, remaining_data_length,
1184 &msg);
1185 if (ret) {
1186 atomic_inc(&t->send_credits);
1187 return ret;
1188 }
1189
1190 for (i = 0; i < niov; i++) {
1191 struct ib_sge *sge;
1192 int sg_cnt;
1193
64b39f4a 1194 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
0626e664 1195 sg_cnt = get_mapped_sg_list(t->cm_id->device,
070fb21e
NJ
1196 iov[i].iov_base, iov[i].iov_len,
1197 sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1198 DMA_TO_DEVICE);
0626e664 1199 if (sg_cnt <= 0) {
bde1694a 1200 pr_err("failed to map buffer\n");
bc3fcc94 1201 ret = -ENOMEM;
0626e664 1202 goto err;
72d6cbb5 1203 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
bde1694a 1204 pr_err("buffer not fitted into sges\n");
0626e664
NJ
1205 ret = -E2BIG;
1206 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1207 DMA_TO_DEVICE);
1208 goto err;
1209 }
1210
1211 for (j = 0; j < sg_cnt; j++) {
1212 sge = &msg->sge[msg->num_sge];
1213 sge->addr = sg_dma_address(&sg[j]);
1214 sge->length = sg_dma_len(&sg[j]);
1215 sge->lkey = t->pd->local_dma_lkey;
1216 msg->num_sge++;
1217 }
1218 }
1219
1220 ret = post_sendmsg(t, send_ctx, msg);
1221 if (ret)
1222 goto err;
1223 return 0;
1224err:
1225 smb_direct_free_sendmsg(t, msg);
1226 atomic_inc(&t->send_credits);
1227 return ret;
1228}
1229
1230static int smb_direct_writev(struct ksmbd_transport *t,
070fb21e
NJ
1231 struct kvec *iov, int niovs, int buflen,
1232 bool need_invalidate, unsigned int remote_key)
0626e664 1233{
02d4b4aa 1234 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
1235 int remaining_data_length;
1236 int start, i, j;
1237 int max_iov_size = st->max_send_size -
1238 sizeof(struct smb_direct_data_transfer);
1239 int ret;
1240 struct kvec vec;
1241 struct smb_direct_send_ctx send_ctx;
1242
b8fc94cd
NJ
1243 if (st->status != SMB_DIRECT_CS_CONNECTED)
1244 return -ENOTCONN;
0626e664
NJ
1245
1246 //FIXME: skip RFC1002 header..
1247 buflen -= 4;
1248 iov[0].iov_base += 4;
1249 iov[0].iov_len -= 4;
1250
1251 remaining_data_length = buflen;
1252 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1253
1254 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1255 start = i = 0;
1256 buflen = 0;
1257 while (true) {
1258 buflen += iov[i].iov_len;
1259 if (buflen > max_iov_size) {
1260 if (i > start) {
1261 remaining_data_length -=
64b39f4a 1262 (buflen - iov[i].iov_len);
0626e664 1263 ret = smb_direct_post_send_data(st, &send_ctx,
070fb21e
NJ
1264 &iov[start], i - start,
1265 remaining_data_length);
0626e664
NJ
1266 if (ret)
1267 goto done;
1268 } else {
1269 /* iov[start] is too big, break it */
64b39f4a 1270 int nvec = (buflen + max_iov_size - 1) /
0626e664
NJ
1271 max_iov_size;
1272
1273 for (j = 0; j < nvec; j++) {
1274 vec.iov_base =
1275 (char *)iov[start].iov_base +
64b39f4a 1276 j * max_iov_size;
0626e664
NJ
1277 vec.iov_len =
1278 min_t(int, max_iov_size,
070fb21e 1279 buflen - max_iov_size * j);
0626e664 1280 remaining_data_length -= vec.iov_len;
070fb21e
NJ
1281 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1282 remaining_data_length);
0626e664
NJ
1283 if (ret)
1284 goto done;
1285 }
1286 i++;
1287 if (i == niovs)
1288 break;
1289 }
1290 start = i;
1291 buflen = 0;
1292 } else {
1293 i++;
1294 if (i == niovs) {
1295 /* send out all remaining vecs */
1296 remaining_data_length -= buflen;
1297 ret = smb_direct_post_send_data(st, &send_ctx,
070fb21e
NJ
1298 &iov[start], i - start,
1299 remaining_data_length);
0626e664
NJ
1300 if (ret)
1301 goto done;
1302 break;
1303 }
1304 }
1305 }
1306
1307done:
1308 ret = smb_direct_flush_send_list(st, &send_ctx, true);
1309
1310 /*
1311 * As an optimization, we don't wait for individual I/O to finish
1312 * before sending the next one.
1313 * Send them all and wait for pending send count to get to 0
1314 * that means all the I/Os have been out and we are good to return
1315 */
1316
11659a8d
HL
1317 wait_event(st->wait_send_pending,
1318 atomic_read(&st->send_pending) == 0);
0626e664
NJ
1319 return ret;
1320}
1321
ee1b0558
HL
1322static void smb_direct_free_rdma_rw_msg(struct smb_direct_transport *t,
1323 struct smb_direct_rdma_rw_msg *msg,
1324 enum dma_data_direction dir)
1325{
1326 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
1327 msg->sgt.sgl, msg->sgt.nents, dir);
1328 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1329 kfree(msg);
1330}
1331
0626e664 1332static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
070fb21e 1333 enum dma_data_direction dir)
0626e664
NJ
1334{
1335 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
070fb21e 1336 struct smb_direct_rdma_rw_msg, cqe);
0626e664
NJ
1337 struct smb_direct_transport *t = msg->t;
1338
1339 if (wc->status != IB_WC_SUCCESS) {
ee1b0558 1340 msg->status = -EIO;
bde1694a
NJ
1341 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1342 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
ee1b0558
HL
1343 if (wc->status != IB_WC_WR_FLUSH_ERR)
1344 smb_direct_disconnect_rdma_connection(t);
0626e664
NJ
1345 }
1346
0626e664 1347 complete(msg->completion);
0626e664
NJ
1348}
1349
1350static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1351{
1352 read_write_done(cq, wc, DMA_FROM_DEVICE);
1353}
1354
1355static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1356{
1357 read_write_done(cq, wc, DMA_TO_DEVICE);
1358}
1359
1807abcf
HL
1360static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
1361 void *buf, int buf_len,
1362 struct smb2_buffer_desc_v1 *desc,
1363 unsigned int desc_len,
1364 bool is_read)
0626e664 1365{
ee1b0558
HL
1366 struct smb_direct_rdma_rw_msg *msg, *next_msg;
1367 int i, ret;
0626e664 1368 DECLARE_COMPLETION_ONSTACK(completion);
ee1b0558
HL
1369 struct ib_send_wr *first_wr;
1370 LIST_HEAD(msg_list);
1371 char *desc_buf;
ddbdc861 1372 int credits_needed;
ee1b0558
HL
1373 unsigned int desc_buf_len;
1374 size_t total_length = 0;
1375
1376 if (t->status != SMB_DIRECT_CS_CONNECTED)
1377 return -ENOTCONN;
1378
1379 /* calculate needed credits */
1380 credits_needed = 0;
1381 desc_buf = buf;
1382 for (i = 0; i < desc_len / sizeof(*desc); i++) {
1383 desc_buf_len = le32_to_cpu(desc[i].length);
1384
1385 credits_needed += calc_rw_credits(t, desc_buf, desc_buf_len);
1386 desc_buf += desc_buf_len;
1387 total_length += desc_buf_len;
1388 if (desc_buf_len == 0 || total_length > buf_len ||
1389 total_length > t->max_rdma_rw_size)
1390 return -EINVAL;
1391 }
1392
1393 ksmbd_debug(RDMA, "RDMA %s, len %#x, needed credits %#x\n",
1394 is_read ? "read" : "write", buf_len, credits_needed);
0626e664 1395
ddbdc861 1396 ret = wait_for_rw_credits(t, credits_needed);
0626e664
NJ
1397 if (ret < 0)
1398 return ret;
1399
ee1b0558
HL
1400 /* build rdma_rw_ctx for each descriptor */
1401 desc_buf = buf;
1402 for (i = 0; i < desc_len / sizeof(*desc); i++) {
1403 msg = kzalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
1404 sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
1405 if (!msg) {
1406 ret = -ENOMEM;
1407 goto out;
1408 }
0626e664 1409
ee1b0558 1410 desc_buf_len = le32_to_cpu(desc[i].length);
0626e664 1411
ee1b0558
HL
1412 msg->t = t;
1413 msg->cqe.done = is_read ? read_done : write_done;
1414 msg->completion = &completion;
0626e664 1415
ee1b0558
HL
1416 msg->sgt.sgl = &msg->sg_list[0];
1417 ret = sg_alloc_table_chained(&msg->sgt,
1418 get_buf_page_count(desc_buf, desc_buf_len),
1419 msg->sg_list, SG_CHUNK_SIZE);
1420 if (ret) {
1421 kfree(msg);
1422 ret = -ENOMEM;
1423 goto out;
1424 }
1425
1426 ret = get_sg_list(desc_buf, desc_buf_len,
1427 msg->sgt.sgl, msg->sgt.orig_nents);
1428 if (ret < 0) {
1429 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1430 kfree(msg);
1431 goto out;
1432 }
1433
1434 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
1435 msg->sgt.sgl,
1436 get_buf_page_count(desc_buf, desc_buf_len),
1437 0,
1438 le64_to_cpu(desc[i].offset),
1439 le32_to_cpu(desc[i].token),
1440 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1441 if (ret < 0) {
1442 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
1443 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1444 kfree(msg);
1445 goto out;
1446 }
1447
1448 list_add_tail(&msg->list, &msg_list);
1449 desc_buf += desc_buf_len;
0626e664
NJ
1450 }
1451
ee1b0558
HL
1452 /* concatenate work requests of rdma_rw_ctxs */
1453 first_wr = NULL;
1454 list_for_each_entry_reverse(msg, &msg_list, list) {
1455 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
1456 &msg->cqe, first_wr);
1457 }
0626e664
NJ
1458
1459 ret = ib_post_send(t->qp, first_wr, NULL);
1460 if (ret) {
ee1b0558
HL
1461 pr_err("failed to post send wr for RDMA R/W: %d\n", ret);
1462 goto out;
0626e664
NJ
1463 }
1464
ee1b0558 1465 msg = list_last_entry(&msg_list, struct smb_direct_rdma_rw_msg, list);
0626e664 1466 wait_for_completion(&completion);
ee1b0558
HL
1467 ret = msg->status;
1468out:
1469 list_for_each_entry_safe(msg, next_msg, &msg_list, list) {
1470 list_del(&msg->list);
1471 smb_direct_free_rdma_rw_msg(t, msg,
1472 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
1473 }
ddbdc861 1474 atomic_add(credits_needed, &t->rw_credits);
ee1b0558 1475 wake_up(&t->wait_rw_credits);
0626e664 1476 return ret;
0626e664
NJ
1477}
1478
1807abcf
HL
1479static int smb_direct_rdma_write(struct ksmbd_transport *t,
1480 void *buf, unsigned int buflen,
1481 struct smb2_buffer_desc_v1 *desc,
1482 unsigned int desc_len)
0626e664 1483{
02d4b4aa 1484 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1807abcf 1485 desc, desc_len, false);
0626e664
NJ
1486}
1487
1807abcf
HL
1488static int smb_direct_rdma_read(struct ksmbd_transport *t,
1489 void *buf, unsigned int buflen,
1490 struct smb2_buffer_desc_v1 *desc,
1491 unsigned int desc_len)
0626e664 1492{
02d4b4aa 1493 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
1807abcf 1494 desc, desc_len, true);
0626e664
NJ
1495}
1496
1497static void smb_direct_disconnect(struct ksmbd_transport *t)
1498{
02d4b4aa 1499 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
1500
1501 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1502
323b1ea1 1503 smb_direct_disconnect_rdma_work(&st->disconnect_work);
0626e664 1504 wait_event_interruptible(st->wait_status,
070fb21e 1505 st->status == SMB_DIRECT_CS_DISCONNECTED);
0626e664
NJ
1506 free_transport(st);
1507}
1508
136dff3a
YC
1509static void smb_direct_shutdown(struct ksmbd_transport *t)
1510{
1511 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1512
1513 ksmbd_debug(RDMA, "smb-direct shutdown cm_id=%p\n", st->cm_id);
1514
1515 smb_direct_disconnect_rdma_work(&st->disconnect_work);
1516}
1517
0626e664 1518static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
070fb21e 1519 struct rdma_cm_event *event)
0626e664
NJ
1520{
1521 struct smb_direct_transport *t = cm_id->context;
1522
1523 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
070fb21e 1524 cm_id, rdma_event_msg(event->event), event->event);
0626e664
NJ
1525
1526 switch (event->event) {
1527 case RDMA_CM_EVENT_ESTABLISHED: {
1528 t->status = SMB_DIRECT_CS_CONNECTED;
1529 wake_up_interruptible(&t->wait_status);
1530 break;
1531 }
1532 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1533 case RDMA_CM_EVENT_DISCONNECTED: {
1534 t->status = SMB_DIRECT_CS_DISCONNECTED;
1535 wake_up_interruptible(&t->wait_status);
1536 wake_up_interruptible(&t->wait_reassembly_queue);
1537 wake_up(&t->wait_send_credits);
1538 break;
1539 }
1540 case RDMA_CM_EVENT_CONNECT_ERROR: {
1541 t->status = SMB_DIRECT_CS_DISCONNECTED;
1542 wake_up_interruptible(&t->wait_status);
1543 break;
1544 }
1545 default:
bde1694a
NJ
1546 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1547 cm_id, rdma_event_msg(event->event),
1548 event->event);
0626e664
NJ
1549 break;
1550 }
1551 return 0;
1552}
1553
1554static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1555{
1556 struct smb_direct_transport *t = context;
1557
1558 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
070fb21e 1559 t->cm_id, ib_event_msg(event->event), event->event);
0626e664
NJ
1560
1561 switch (event->event) {
1562 case IB_EVENT_CQ_ERR:
1563 case IB_EVENT_QP_FATAL:
1564 smb_direct_disconnect_rdma_connection(t);
1565 break;
1566 default:
1567 break;
1568 }
1569}
1570
1571static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
070fb21e 1572 int failed)
0626e664
NJ
1573{
1574 struct smb_direct_sendmsg *sendmsg;
1575 struct smb_direct_negotiate_resp *resp;
1576 int ret;
1577
1578 sendmsg = smb_direct_alloc_sendmsg(t);
1579 if (IS_ERR(sendmsg))
1580 return -ENOMEM;
1581
1582 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1583 if (failed) {
1584 memset(resp, 0, sizeof(*resp));
1585 resp->min_version = cpu_to_le16(0x0100);
1586 resp->max_version = cpu_to_le16(0x0100);
1587 resp->status = STATUS_NOT_SUPPORTED;
1588 } else {
1589 resp->status = STATUS_SUCCESS;
1590 resp->min_version = SMB_DIRECT_VERSION_LE;
1591 resp->max_version = SMB_DIRECT_VERSION_LE;
1592 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1593 resp->reserved = 0;
1594 resp->credits_requested =
1595 cpu_to_le16(t->send_credit_target);
64b39f4a 1596 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
0626e664
NJ
1597 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1598 resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1599 resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1600 resp->max_fragmented_size =
1601 cpu_to_le32(t->max_fragmented_recv_size);
1602 }
1603
1604 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
070fb21e
NJ
1605 (void *)resp, sizeof(*resp),
1606 DMA_TO_DEVICE);
1607 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
0626e664
NJ
1608 if (ret) {
1609 smb_direct_free_sendmsg(t, sendmsg);
1610 return ret;
1611 }
1612
1613 sendmsg->num_sge = 1;
1614 sendmsg->sge[0].length = sizeof(*resp);
1615 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1616
1617 ret = post_sendmsg(t, NULL, sendmsg);
1618 if (ret) {
1619 smb_direct_free_sendmsg(t, sendmsg);
1620 return ret;
1621 }
1622
1623 wait_event(t->wait_send_pending,
070fb21e 1624 atomic_read(&t->send_pending) == 0);
0626e664
NJ
1625 return 0;
1626}
1627
1628static int smb_direct_accept_client(struct smb_direct_transport *t)
1629{
1630 struct rdma_conn_param conn_param;
1631 struct ib_port_immutable port_immutable;
1632 u32 ird_ord_hdr[2];
1633 int ret;
1634
1635 memset(&conn_param, 0, sizeof(conn_param));
070fb21e
NJ
1636 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1637 SMB_DIRECT_CM_INITIATOR_DEPTH);
0626e664
NJ
1638 conn_param.responder_resources = 0;
1639
1640 t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
070fb21e
NJ
1641 t->cm_id->port_num,
1642 &port_immutable);
0626e664
NJ
1643 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1644 ird_ord_hdr[0] = conn_param.responder_resources;
1645 ird_ord_hdr[1] = 1;
1646 conn_param.private_data = ird_ord_hdr;
1647 conn_param.private_data_len = sizeof(ird_ord_hdr);
1648 } else {
1649 conn_param.private_data = NULL;
1650 conn_param.private_data_len = 0;
1651 }
1652 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1653 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1654 conn_param.flow_control = 0;
1655
1656 ret = rdma_accept(t->cm_id, &conn_param);
1657 if (ret) {
bde1694a 1658 pr_err("error at rdma_accept: %d\n", ret);
0626e664
NJ
1659 return ret;
1660 }
0626e664
NJ
1661 return 0;
1662}
1663
99b7650a 1664static int smb_direct_prepare_negotiation(struct smb_direct_transport *t)
0626e664
NJ
1665{
1666 int ret;
1667 struct smb_direct_recvmsg *recvmsg;
0626e664
NJ
1668
1669 recvmsg = get_free_recvmsg(t);
1670 if (!recvmsg)
1671 return -ENOMEM;
1672 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1673
1674 ret = smb_direct_post_recv(t, recvmsg);
1675 if (ret) {
bde1694a 1676 pr_err("Can't post recv: %d\n", ret);
99b7650a 1677 goto out_err;
0626e664
NJ
1678 }
1679
1680 t->negotiation_requested = false;
1681 ret = smb_direct_accept_client(t);
1682 if (ret) {
bde1694a 1683 pr_err("Can't accept client\n");
99b7650a 1684 goto out_err;
0626e664
NJ
1685 }
1686
1687 smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
99b7650a
HL
1688 return 0;
1689out_err:
1690 put_recvmsg(t, recvmsg);
0626e664
NJ
1691 return ret;
1692}
1693
ddbdc861
HL
1694static unsigned int smb_direct_get_max_fr_pages(struct smb_direct_transport *t)
1695{
1696 return min_t(unsigned int,
1697 t->cm_id->device->attrs.max_fast_reg_page_list_len,
1698 256);
1699}
1700
0626e664 1701static int smb_direct_init_params(struct smb_direct_transport *t,
070fb21e 1702 struct ib_qp_cap *cap)
0626e664
NJ
1703{
1704 struct ib_device *device = t->cm_id->device;
ddbdc861
HL
1705 int max_send_sges, max_rw_wrs, max_send_wrs;
1706 unsigned int max_sge_per_wr, wrs_per_credit;
0626e664
NJ
1707
1708 /* need 2 more sge. because a SMB_DIRECT header will be mapped,
1709 * and maybe a send buffer could be not page aligned.
1710 */
1711 t->max_send_size = smb_direct_max_send_size;
1712 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
1713 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
bde1694a 1714 pr_err("max_send_size %d is too large\n", t->max_send_size);
0626e664
NJ
1715 return -EINVAL;
1716 }
1717
ddbdc861
HL
1718 /* Calculate the number of work requests for RDMA R/W.
1719 * The maximum number of pages which can be registered
1720 * with one Memory region can be transferred with one
1721 * R/W credit. And at least 4 work requests for each credit
1722 * are needed for MR registration, RDMA R/W, local & remote
1723 * MR invalidation.
0626e664
NJ
1724 */
1725 t->max_rdma_rw_size = smb_direct_max_read_write_size;
ddbdc861
HL
1726 t->pages_per_rw_credit = smb_direct_get_max_fr_pages(t);
1727 t->max_rw_credits = DIV_ROUND_UP(t->max_rdma_rw_size,
1728 (t->pages_per_rw_credit - 1) *
1729 PAGE_SIZE);
1730
1731 max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
1732 device->attrs.max_sge_rd);
1733 wrs_per_credit = max_t(unsigned int, 4,
1734 DIV_ROUND_UP(t->pages_per_rw_credit,
1735 max_sge_per_wr) + 1);
1736 max_rw_wrs = t->max_rw_credits * wrs_per_credit;
0626e664
NJ
1737
1738 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1739 if (max_send_wrs > device->attrs.max_cqe ||
64b39f4a 1740 max_send_wrs > device->attrs.max_qp_wr) {
ddbdc861
HL
1741 pr_err("consider lowering send_credit_target = %d\n",
1742 smb_direct_send_credit_target);
bde1694a
NJ
1743 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1744 device->attrs.max_cqe, device->attrs.max_qp_wr);
0626e664
NJ
1745 return -EINVAL;
1746 }
1747
1748 if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1749 smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
bde1694a
NJ
1750 pr_err("consider lowering receive_credit_max = %d\n",
1751 smb_direct_receive_credit_max);
1752 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1753 device->attrs.max_cqe, device->attrs.max_qp_wr);
0626e664
NJ
1754 return -EINVAL;
1755 }
1756
1757 if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
bde1694a
NJ
1758 pr_err("warning: device max_send_sge = %d too small\n",
1759 device->attrs.max_send_sge);
0626e664
NJ
1760 return -EINVAL;
1761 }
1762 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
bde1694a
NJ
1763 pr_err("warning: device max_recv_sge = %d too small\n",
1764 device->attrs.max_recv_sge);
0626e664
NJ
1765 return -EINVAL;
1766 }
1767
1768 t->recv_credits = 0;
1769 t->count_avail_recvmsg = 0;
1770
1771 t->recv_credit_max = smb_direct_receive_credit_max;
1772 t->recv_credit_target = 10;
1773 t->new_recv_credits = 0;
1774
1775 t->send_credit_target = smb_direct_send_credit_target;
1776 atomic_set(&t->send_credits, 0);
ddbdc861 1777 atomic_set(&t->rw_credits, t->max_rw_credits);
0626e664
NJ
1778
1779 t->max_send_size = smb_direct_max_send_size;
1780 t->max_recv_size = smb_direct_max_receive_size;
1781 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1782
1783 cap->max_send_wr = max_send_wrs;
1784 cap->max_recv_wr = t->recv_credit_max;
ddbdc861 1785 cap->max_send_sge = max_sge_per_wr;
0626e664
NJ
1786 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1787 cap->max_inline_data = 0;
ddbdc861 1788 cap->max_rdma_ctxs = t->max_rw_credits;
0626e664
NJ
1789 return 0;
1790}
1791
1792static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1793{
1794 struct smb_direct_recvmsg *recvmsg;
1795
1796 while ((recvmsg = get_free_recvmsg(t)))
1797 mempool_free(recvmsg, t->recvmsg_mempool);
1798 while ((recvmsg = get_empty_recvmsg(t)))
1799 mempool_free(recvmsg, t->recvmsg_mempool);
1800
1801 mempool_destroy(t->recvmsg_mempool);
1802 t->recvmsg_mempool = NULL;
1803
1804 kmem_cache_destroy(t->recvmsg_cache);
1805 t->recvmsg_cache = NULL;
1806
1807 mempool_destroy(t->sendmsg_mempool);
1808 t->sendmsg_mempool = NULL;
1809
1810 kmem_cache_destroy(t->sendmsg_cache);
1811 t->sendmsg_cache = NULL;
1812}
1813
1814static int smb_direct_create_pools(struct smb_direct_transport *t)
1815{
1816 char name[80];
1817 int i;
1818 struct smb_direct_recvmsg *recvmsg;
1819
1820 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1821 t->sendmsg_cache = kmem_cache_create(name,
070fb21e
NJ
1822 sizeof(struct smb_direct_sendmsg) +
1823 sizeof(struct smb_direct_negotiate_resp),
1824 0, SLAB_HWCACHE_ALIGN, NULL);
0626e664
NJ
1825 if (!t->sendmsg_cache)
1826 return -ENOMEM;
1827
1828 t->sendmsg_mempool = mempool_create(t->send_credit_target,
070fb21e
NJ
1829 mempool_alloc_slab, mempool_free_slab,
1830 t->sendmsg_cache);
0626e664
NJ
1831 if (!t->sendmsg_mempool)
1832 goto err;
1833
1834 snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1835 t->recvmsg_cache = kmem_cache_create(name,
070fb21e
NJ
1836 sizeof(struct smb_direct_recvmsg) +
1837 t->max_recv_size,
1838 0, SLAB_HWCACHE_ALIGN, NULL);
0626e664
NJ
1839 if (!t->recvmsg_cache)
1840 goto err;
1841
1842 t->recvmsg_mempool =
1843 mempool_create(t->recv_credit_max, mempool_alloc_slab,
070fb21e 1844 mempool_free_slab, t->recvmsg_cache);
0626e664
NJ
1845 if (!t->recvmsg_mempool)
1846 goto err;
1847
1848 INIT_LIST_HEAD(&t->recvmsg_queue);
1849
1850 for (i = 0; i < t->recv_credit_max; i++) {
1851 recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
1852 if (!recvmsg)
1853 goto err;
1854 recvmsg->transport = t;
1855 list_add(&recvmsg->list, &t->recvmsg_queue);
1856 }
1857 t->count_avail_recvmsg = t->recv_credit_max;
1858
1859 return 0;
1860err:
1861 smb_direct_destroy_pools(t);
1862 return -ENOMEM;
1863}
1864
1865static int smb_direct_create_qpair(struct smb_direct_transport *t,
070fb21e 1866 struct ib_qp_cap *cap)
0626e664
NJ
1867{
1868 int ret;
1869 struct ib_qp_init_attr qp_attr;
c9f18927 1870 int pages_per_rw;
0626e664
NJ
1871
1872 t->pd = ib_alloc_pd(t->cm_id->device, 0);
1873 if (IS_ERR(t->pd)) {
bde1694a 1874 pr_err("Can't create RDMA PD\n");
0626e664
NJ
1875 ret = PTR_ERR(t->pd);
1876 t->pd = NULL;
1877 return ret;
1878 }
1879
1880 t->send_cq = ib_alloc_cq(t->cm_id->device, t,
ddbdc861
HL
1881 smb_direct_send_credit_target + cap->max_rdma_ctxs,
1882 0, IB_POLL_WORKQUEUE);
0626e664 1883 if (IS_ERR(t->send_cq)) {
bde1694a 1884 pr_err("Can't create RDMA send CQ\n");
0626e664
NJ
1885 ret = PTR_ERR(t->send_cq);
1886 t->send_cq = NULL;
1887 goto err;
1888 }
1889
1890 t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
ddbdc861 1891 t->recv_credit_max, 0, IB_POLL_WORKQUEUE);
0626e664 1892 if (IS_ERR(t->recv_cq)) {
bde1694a 1893 pr_err("Can't create RDMA recv CQ\n");
0626e664
NJ
1894 ret = PTR_ERR(t->recv_cq);
1895 t->recv_cq = NULL;
1896 goto err;
1897 }
1898
1899 memset(&qp_attr, 0, sizeof(qp_attr));
1900 qp_attr.event_handler = smb_direct_qpair_handler;
1901 qp_attr.qp_context = t;
1902 qp_attr.cap = *cap;
1903 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1904 qp_attr.qp_type = IB_QPT_RC;
1905 qp_attr.send_cq = t->send_cq;
1906 qp_attr.recv_cq = t->recv_cq;
1907 qp_attr.port_num = ~0;
1908
1909 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1910 if (ret) {
bde1694a 1911 pr_err("Can't create RDMA QP: %d\n", ret);
0626e664
NJ
1912 goto err;
1913 }
1914
1915 t->qp = t->cm_id->qp;
1916 t->cm_id->event_handler = smb_direct_cm_handler;
1917
c9f18927
HL
1918 pages_per_rw = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1919 if (pages_per_rw > t->cm_id->device->attrs.max_sgl_rd) {
ddbdc861
HL
1920 ret = ib_mr_pool_init(t->qp, &t->qp->rdma_mrs,
1921 t->max_rw_credits, IB_MR_TYPE_MEM_REG,
1922 t->pages_per_rw_credit, 0);
c9f18927
HL
1923 if (ret) {
1924 pr_err("failed to init mr pool count %d pages %d\n",
ddbdc861 1925 t->max_rw_credits, t->pages_per_rw_credit);
c9f18927
HL
1926 goto err;
1927 }
1928 }
1929
0626e664
NJ
1930 return 0;
1931err:
1932 if (t->qp) {
1933 ib_destroy_qp(t->qp);
1934 t->qp = NULL;
1935 }
1936 if (t->recv_cq) {
1937 ib_destroy_cq(t->recv_cq);
1938 t->recv_cq = NULL;
1939 }
1940 if (t->send_cq) {
1941 ib_destroy_cq(t->send_cq);
1942 t->send_cq = NULL;
1943 }
1944 if (t->pd) {
1945 ib_dealloc_pd(t->pd);
1946 t->pd = NULL;
1947 }
1948 return ret;
1949}
1950
1951static int smb_direct_prepare(struct ksmbd_transport *t)
1952{
02d4b4aa 1953 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
99b7650a
HL
1954 struct smb_direct_recvmsg *recvmsg;
1955 struct smb_direct_negotiate_req *req;
1956 int ret;
1957
1958 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1959 ret = wait_event_interruptible_timeout(st->wait_status,
1960 st->negotiation_requested ||
1961 st->status == SMB_DIRECT_CS_DISCONNECTED,
1962 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
1963 if (ret <= 0 || st->status == SMB_DIRECT_CS_DISCONNECTED)
1964 return ret < 0 ? ret : -ETIMEDOUT;
1965
1966 recvmsg = get_first_reassembly(st);
1967 if (!recvmsg)
1968 return -ECONNABORTED;
1969
1970 ret = smb_direct_check_recvmsg(recvmsg);
1971 if (ret == -ECONNABORTED)
1972 goto out;
1973
1974 req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1975 st->max_recv_size = min_t(int, st->max_recv_size,
1976 le32_to_cpu(req->preferred_send_size));
1977 st->max_send_size = min_t(int, st->max_send_size,
1978 le32_to_cpu(req->max_receive_size));
1979 st->max_fragmented_send_size =
4d02c4fd
HL
1980 le32_to_cpu(req->max_fragmented_size);
1981 st->max_fragmented_recv_size =
1982 (st->recv_credit_max * st->max_recv_size) / 2;
99b7650a
HL
1983
1984 ret = smb_direct_send_negotiate_response(st, ret);
1985out:
1986 spin_lock_irq(&st->reassembly_queue_lock);
1987 st->reassembly_queue_length--;
1988 list_del(&recvmsg->list);
1989 spin_unlock_irq(&st->reassembly_queue_lock);
1990 put_recvmsg(st, recvmsg);
1991
1992 return ret;
1993}
1994
1995static int smb_direct_connect(struct smb_direct_transport *st)
1996{
0626e664
NJ
1997 int ret;
1998 struct ib_qp_cap qp_cap;
1999
2000 ret = smb_direct_init_params(st, &qp_cap);
2001 if (ret) {
bde1694a 2002 pr_err("Can't configure RDMA parameters\n");
0626e664
NJ
2003 return ret;
2004 }
2005
2006 ret = smb_direct_create_pools(st);
2007 if (ret) {
bde1694a 2008 pr_err("Can't init RDMA pool: %d\n", ret);
0626e664
NJ
2009 return ret;
2010 }
2011
2012 ret = smb_direct_create_qpair(st, &qp_cap);
2013 if (ret) {
bde1694a 2014 pr_err("Can't accept RDMA client: %d\n", ret);
0626e664
NJ
2015 return ret;
2016 }
2017
99b7650a 2018 ret = smb_direct_prepare_negotiation(st);
0626e664 2019 if (ret) {
bde1694a 2020 pr_err("Can't negotiate: %d\n", ret);
0626e664
NJ
2021 return ret;
2022 }
0626e664
NJ
2023 return 0;
2024}
2025
2026static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
2027{
2028 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
2029 return false;
2030 if (attrs->max_fast_reg_page_list_len == 0)
2031 return false;
2032 return true;
2033}
2034
2035static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
2036{
2037 struct smb_direct_transport *t;
99b7650a 2038 int ret;
0626e664
NJ
2039
2040 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
2041 ksmbd_debug(RDMA,
070fb21e
NJ
2042 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
2043 new_cm_id->device->attrs.device_cap_flags);
0626e664
NJ
2044 return -EPROTONOSUPPORT;
2045 }
2046
2047 t = alloc_transport(new_cm_id);
2048 if (!t)
2049 return -ENOMEM;
2050
99b7650a
HL
2051 ret = smb_direct_connect(t);
2052 if (ret)
2053 goto out_err;
2054
0626e664 2055 KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
070fb21e 2056 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
cb097b3d 2057 smb_direct_port);
0626e664 2058 if (IS_ERR(KSMBD_TRANS(t)->handler)) {
99b7650a 2059 ret = PTR_ERR(KSMBD_TRANS(t)->handler);
bde1694a 2060 pr_err("Can't start thread\n");
99b7650a 2061 goto out_err;
0626e664
NJ
2062 }
2063
2064 return 0;
99b7650a
HL
2065out_err:
2066 free_transport(t);
2067 return ret;
0626e664
NJ
2068}
2069
2070static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
070fb21e 2071 struct rdma_cm_event *event)
0626e664
NJ
2072{
2073 switch (event->event) {
2074 case RDMA_CM_EVENT_CONNECT_REQUEST: {
2075 int ret = smb_direct_handle_connect_request(cm_id);
2076
2077 if (ret) {
bde1694a 2078 pr_err("Can't create transport: %d\n", ret);
0626e664
NJ
2079 return ret;
2080 }
2081
2082 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
070fb21e 2083 cm_id);
0626e664
NJ
2084 break;
2085 }
2086 default:
bde1694a
NJ
2087 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
2088 cm_id, rdma_event_msg(event->event), event->event);
0626e664
NJ
2089 break;
2090 }
2091 return 0;
2092}
2093
2094static int smb_direct_listen(int port)
2095{
2096 int ret;
2097 struct rdma_cm_id *cm_id;
2098 struct sockaddr_in sin = {
2099 .sin_family = AF_INET,
2100 .sin_addr.s_addr = htonl(INADDR_ANY),
2101 .sin_port = htons(port),
2102 };
2103
2104 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
070fb21e 2105 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
0626e664 2106 if (IS_ERR(cm_id)) {
bde1694a 2107 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
0626e664
NJ
2108 return PTR_ERR(cm_id);
2109 }
2110
2111 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
2112 if (ret) {
bde1694a 2113 pr_err("Can't bind: %d\n", ret);
0626e664
NJ
2114 goto err;
2115 }
2116
2117 smb_direct_listener.cm_id = cm_id;
2118
2119 ret = rdma_listen(cm_id, 10);
2120 if (ret) {
bde1694a 2121 pr_err("Can't listen: %d\n", ret);
0626e664
NJ
2122 goto err;
2123 }
2124 return 0;
2125err:
2126 smb_direct_listener.cm_id = NULL;
2127 rdma_destroy_id(cm_id);
2128 return ret;
2129}
2130
31928a00
HL
2131static int smb_direct_ib_client_add(struct ib_device *ib_dev)
2132{
2133 struct smb_direct_device *smb_dev;
2134
cb097b3d
NJ
2135 /* Set 5445 port if device type is iWARP(No IB) */
2136 if (ib_dev->node_type != RDMA_NODE_IB_CA)
2137 smb_direct_port = SMB_DIRECT_PORT_IWARP;
2138
31928a00
HL
2139 if (!ib_dev->ops.get_netdev ||
2140 !rdma_frwr_is_supported(&ib_dev->attrs))
2141 return 0;
2142
2143 smb_dev = kzalloc(sizeof(*smb_dev), GFP_KERNEL);
2144 if (!smb_dev)
2145 return -ENOMEM;
2146 smb_dev->ib_dev = ib_dev;
2147
2148 write_lock(&smb_direct_device_lock);
2149 list_add(&smb_dev->list, &smb_direct_device_list);
2150 write_unlock(&smb_direct_device_lock);
2151
2152 ksmbd_debug(RDMA, "ib device added: name %s\n", ib_dev->name);
2153 return 0;
2154}
2155
2156static void smb_direct_ib_client_remove(struct ib_device *ib_dev,
2157 void *client_data)
2158{
2159 struct smb_direct_device *smb_dev, *tmp;
2160
2161 write_lock(&smb_direct_device_lock);
2162 list_for_each_entry_safe(smb_dev, tmp, &smb_direct_device_list, list) {
2163 if (smb_dev->ib_dev == ib_dev) {
2164 list_del(&smb_dev->list);
2165 kfree(smb_dev);
2166 break;
2167 }
2168 }
2169 write_unlock(&smb_direct_device_lock);
2170}
2171
2172static struct ib_client smb_direct_ib_client = {
2173 .name = "ksmbd_smb_direct_ib",
2174 .add = smb_direct_ib_client_add,
2175 .remove = smb_direct_ib_client_remove,
2176};
2177
0626e664
NJ
2178int ksmbd_rdma_init(void)
2179{
2180 int ret;
2181
2182 smb_direct_listener.cm_id = NULL;
2183
31928a00
HL
2184 ret = ib_register_client(&smb_direct_ib_client);
2185 if (ret) {
2186 pr_err("failed to ib_register_client\n");
2187 return ret;
2188 }
2189
0626e664
NJ
2190 /* When a client is running out of send credits, the credits are
2191 * granted by the server's sending a packet using this queue.
2192 * This avoids the situation that a clients cannot send packets
2193 * for lack of credits
2194 */
2195 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
070fb21e 2196 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
0626e664
NJ
2197 if (!smb_direct_wq)
2198 return -ENOMEM;
2199
cb097b3d 2200 ret = smb_direct_listen(smb_direct_port);
0626e664
NJ
2201 if (ret) {
2202 destroy_workqueue(smb_direct_wq);
2203 smb_direct_wq = NULL;
bde1694a 2204 pr_err("Can't listen: %d\n", ret);
0626e664
NJ
2205 return ret;
2206 }
2207
2208 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
070fb21e 2209 smb_direct_listener.cm_id);
0626e664
NJ
2210 return 0;
2211}
2212
31928a00 2213void ksmbd_rdma_destroy(void)
0626e664 2214{
31928a00
HL
2215 if (!smb_direct_listener.cm_id)
2216 return;
2217
2218 ib_unregister_client(&smb_direct_ib_client);
2219 rdma_destroy_id(smb_direct_listener.cm_id);
2220
0626e664
NJ
2221 smb_direct_listener.cm_id = NULL;
2222
2223 if (smb_direct_wq) {
0626e664
NJ
2224 destroy_workqueue(smb_direct_wq);
2225 smb_direct_wq = NULL;
2226 }
0626e664
NJ
2227}
2228
03d8d4f1
HL
2229bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2230{
31928a00
HL
2231 struct smb_direct_device *smb_dev;
2232 int i;
03d8d4f1
HL
2233 bool rdma_capable = false;
2234
31928a00
HL
2235 read_lock(&smb_direct_device_lock);
2236 list_for_each_entry(smb_dev, &smb_direct_device_list, list) {
2237 for (i = 0; i < smb_dev->ib_dev->phys_port_cnt; i++) {
2238 struct net_device *ndev;
2239
2240 ndev = smb_dev->ib_dev->ops.get_netdev(smb_dev->ib_dev,
2241 i + 1);
2242 if (!ndev)
2243 continue;
2244
2245 if (ndev == netdev) {
2246 dev_put(ndev);
2247 rdma_capable = true;
2248 goto out;
2249 }
2250 dev_put(ndev);
2251 }
2252 }
2253out:
2254 read_unlock(&smb_direct_device_lock);
2255
2256 if (rdma_capable == false) {
2257 struct ib_device *ibdev;
2258
2259 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2260 if (ibdev) {
2261 if (rdma_frwr_is_supported(&ibdev->attrs))
2262 rdma_capable = true;
2263 ib_device_put(ibdev);
2264 }
03d8d4f1 2265 }
31928a00 2266
03d8d4f1
HL
2267 return rdma_capable;
2268}
2269
0626e664
NJ
2270static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2271 .prepare = smb_direct_prepare,
2272 .disconnect = smb_direct_disconnect,
136dff3a 2273 .shutdown = smb_direct_shutdown,
0626e664
NJ
2274 .writev = smb_direct_writev,
2275 .read = smb_direct_read,
2276 .rdma_read = smb_direct_rdma_read,
2277 .rdma_write = smb_direct_rdma_write,
2278};