ksmbd: Fix smb2_get_name() kernel-doc comment
[linux-block.git] / fs / ksmbd / transport_rdma.c
CommitLineData
0626e664
NJ
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU General Public License for more details.
18 */
19
20#define SUBMOD_NAME "smb_direct"
21
22#include <linux/kthread.h>
0626e664
NJ
23#include <linux/list.h>
24#include <linux/mempool.h>
25#include <linux/highmem.h>
26#include <linux/scatterlist.h>
27#include <rdma/ib_verbs.h>
28#include <rdma/rdma_cm.h>
29#include <rdma/rw.h>
30
31#include "glob.h"
32#include "connection.h"
33#include "smb_common.h"
34#include "smbstatus.h"
0626e664
NJ
35#include "transport_rdma.h"
36
37#define SMB_DIRECT_PORT 5445
38
39#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
40
41/* SMB_DIRECT negotiation timeout in seconds */
42#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
43
44#define SMB_DIRECT_MAX_SEND_SGES 8
45#define SMB_DIRECT_MAX_RECV_SGES 1
46
47/*
48 * Default maximum number of RDMA read/write outstanding on this connection
49 * This value is possibly decreased during QP creation on hardware limit
50 */
51#define SMB_DIRECT_CM_INITIATOR_DEPTH 8
52
53/* Maximum number of retries on data transfer operations */
54#define SMB_DIRECT_CM_RETRY 6
55/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
56#define SMB_DIRECT_CM_RNR_RETRY 0
57
58/*
59 * User configurable initial values per SMB_DIRECT transport connection
92239588 60 * as defined in [MS-SMBD] 3.1.1.1
0626e664
NJ
61 * Those may change after a SMB_DIRECT negotiation
62 */
63/* The local peer's maximum number of credits to grant to the peer */
64static int smb_direct_receive_credit_max = 255;
65
66/* The remote peer's credit request of local peer */
67static int smb_direct_send_credit_target = 255;
68
69/* The maximum single message size can be sent to remote peer */
70static int smb_direct_max_send_size = 8192;
71
72/* The maximum fragmented upper-layer payload receive size supported */
73static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
74
75/* The maximum single-message size which can be received */
76static int smb_direct_max_receive_size = 8192;
77
78static int smb_direct_max_read_write_size = 1024 * 1024;
79
80static int smb_direct_max_outstanding_rw_ops = 8;
81
82static struct smb_direct_listener {
83 struct rdma_cm_id *cm_id;
84} smb_direct_listener;
85
0626e664
NJ
86static struct workqueue_struct *smb_direct_wq;
87
88enum smb_direct_status {
89 SMB_DIRECT_CS_NEW = 0,
90 SMB_DIRECT_CS_CONNECTED,
91 SMB_DIRECT_CS_DISCONNECTING,
92 SMB_DIRECT_CS_DISCONNECTED,
93};
94
95struct smb_direct_transport {
96 struct ksmbd_transport transport;
97
98 enum smb_direct_status status;
99 bool full_packet_received;
100 wait_queue_head_t wait_status;
101
102 struct rdma_cm_id *cm_id;
103 struct ib_cq *send_cq;
104 struct ib_cq *recv_cq;
105 struct ib_pd *pd;
106 struct ib_qp *qp;
107
108 int max_send_size;
109 int max_recv_size;
110 int max_fragmented_send_size;
111 int max_fragmented_recv_size;
112 int max_rdma_rw_size;
113
114 spinlock_t reassembly_queue_lock;
115 struct list_head reassembly_queue;
116 int reassembly_data_length;
117 int reassembly_queue_length;
118 int first_entry_offset;
119 wait_queue_head_t wait_reassembly_queue;
120
121 spinlock_t receive_credit_lock;
122 int recv_credits;
123 int count_avail_recvmsg;
124 int recv_credit_max;
125 int recv_credit_target;
126
127 spinlock_t recvmsg_queue_lock;
128 struct list_head recvmsg_queue;
129
130 spinlock_t empty_recvmsg_queue_lock;
131 struct list_head empty_recvmsg_queue;
132
133 int send_credit_target;
134 atomic_t send_credits;
135 spinlock_t lock_new_recv_credits;
136 int new_recv_credits;
137 atomic_t rw_avail_ops;
138
139 wait_queue_head_t wait_send_credits;
140 wait_queue_head_t wait_rw_avail_ops;
141
142 mempool_t *sendmsg_mempool;
143 struct kmem_cache *sendmsg_cache;
144 mempool_t *recvmsg_mempool;
145 struct kmem_cache *recvmsg_cache;
146
147 wait_queue_head_t wait_send_payload_pending;
148 atomic_t send_payload_pending;
149 wait_queue_head_t wait_send_pending;
150 atomic_t send_pending;
151
152 struct delayed_work post_recv_credits_work;
153 struct work_struct send_immediate_work;
154 struct work_struct disconnect_work;
155
156 bool negotiation_requested;
157};
158
159#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
0626e664
NJ
160
161enum {
162 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
163 SMB_DIRECT_MSG_DATA_TRANSFER
164};
165
166static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
167
168struct smb_direct_send_ctx {
169 struct list_head msg_list;
170 int wr_cnt;
171 bool need_invalidate_rkey;
172 unsigned int remote_key;
173};
174
175struct smb_direct_sendmsg {
176 struct smb_direct_transport *transport;
177 struct ib_send_wr wr;
178 struct list_head list;
179 int num_sge;
180 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
181 struct ib_cqe cqe;
182 u8 packet[];
183};
184
185struct smb_direct_recvmsg {
186 struct smb_direct_transport *transport;
187 struct list_head list;
188 int type;
189 struct ib_sge sge;
190 struct ib_cqe cqe;
191 bool first_segment;
192 u8 packet[];
193};
194
195struct smb_direct_rdma_rw_msg {
196 struct smb_direct_transport *t;
197 struct ib_cqe cqe;
198 struct completion *completion;
199 struct rdma_rw_ctx rw_ctx;
200 struct sg_table sgt;
201 struct scatterlist sg_list[0];
202};
203
8ad8dc34
HL
204static inline int get_buf_page_count(void *buf, int size)
205{
206 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
207 (uintptr_t)buf / PAGE_SIZE;
208}
0626e664
NJ
209
210static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
211static void smb_direct_post_recv_credits(struct work_struct *work);
212static int smb_direct_post_send_data(struct smb_direct_transport *t,
070fb21e
NJ
213 struct smb_direct_send_ctx *send_ctx,
214 struct kvec *iov, int niov,
215 int remaining_data_length);
0626e664 216
02d4b4aa
NJ
217static inline struct smb_direct_transport *
218smb_trans_direct_transfort(struct ksmbd_transport *t)
219{
220 return container_of(t, struct smb_direct_transport, transport);
221}
222
0626e664
NJ
223static inline void
224*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
225{
226 return (void *)recvmsg->packet;
227}
228
229static inline bool is_receive_credit_post_required(int receive_credits,
070fb21e 230 int avail_recvmsg_count)
0626e664
NJ
231{
232 return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
233 avail_recvmsg_count >= (receive_credits >> 2);
234}
235
236static struct
237smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
238{
239 struct smb_direct_recvmsg *recvmsg = NULL;
240
241 spin_lock(&t->recvmsg_queue_lock);
242 if (!list_empty(&t->recvmsg_queue)) {
243 recvmsg = list_first_entry(&t->recvmsg_queue,
244 struct smb_direct_recvmsg,
245 list);
246 list_del(&recvmsg->list);
247 }
248 spin_unlock(&t->recvmsg_queue_lock);
249 return recvmsg;
250}
251
252static void put_recvmsg(struct smb_direct_transport *t,
070fb21e 253 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
254{
255 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
070fb21e 256 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
257
258 spin_lock(&t->recvmsg_queue_lock);
259 list_add(&recvmsg->list, &t->recvmsg_queue);
260 spin_unlock(&t->recvmsg_queue_lock);
0626e664
NJ
261}
262
263static struct
264smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
265{
266 struct smb_direct_recvmsg *recvmsg = NULL;
267
268 spin_lock(&t->empty_recvmsg_queue_lock);
269 if (!list_empty(&t->empty_recvmsg_queue)) {
64b39f4a 270 recvmsg = list_first_entry(&t->empty_recvmsg_queue,
070fb21e 271 struct smb_direct_recvmsg, list);
0626e664
NJ
272 list_del(&recvmsg->list);
273 }
274 spin_unlock(&t->empty_recvmsg_queue_lock);
275 return recvmsg;
276}
277
278static void put_empty_recvmsg(struct smb_direct_transport *t,
070fb21e 279 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
280{
281 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
070fb21e 282 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
283
284 spin_lock(&t->empty_recvmsg_queue_lock);
285 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
286 spin_unlock(&t->empty_recvmsg_queue_lock);
287}
288
289static void enqueue_reassembly(struct smb_direct_transport *t,
070fb21e
NJ
290 struct smb_direct_recvmsg *recvmsg,
291 int data_length)
0626e664
NJ
292{
293 spin_lock(&t->reassembly_queue_lock);
294 list_add_tail(&recvmsg->list, &t->reassembly_queue);
295 t->reassembly_queue_length++;
296 /*
297 * Make sure reassembly_data_length is updated after list and
298 * reassembly_queue_length are updated. On the dequeue side
299 * reassembly_data_length is checked without a lock to determine
300 * if reassembly_queue_length and list is up to date
301 */
302 virt_wmb();
303 t->reassembly_data_length += data_length;
304 spin_unlock(&t->reassembly_queue_lock);
0626e664
NJ
305}
306
64b39f4a 307static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
0626e664
NJ
308{
309 if (!list_empty(&t->reassembly_queue))
310 return list_first_entry(&t->reassembly_queue,
311 struct smb_direct_recvmsg, list);
312 else
313 return NULL;
314}
315
316static void smb_direct_disconnect_rdma_work(struct work_struct *work)
317{
318 struct smb_direct_transport *t =
319 container_of(work, struct smb_direct_transport,
320 disconnect_work);
321
322 if (t->status == SMB_DIRECT_CS_CONNECTED) {
323 t->status = SMB_DIRECT_CS_DISCONNECTING;
324 rdma_disconnect(t->cm_id);
325 }
326}
327
328static void
329smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
330{
323b1ea1
HL
331 if (t->status == SMB_DIRECT_CS_CONNECTED)
332 queue_work(smb_direct_wq, &t->disconnect_work);
0626e664
NJ
333}
334
335static void smb_direct_send_immediate_work(struct work_struct *work)
336{
337 struct smb_direct_transport *t = container_of(work,
338 struct smb_direct_transport, send_immediate_work);
339
340 if (t->status != SMB_DIRECT_CS_CONNECTED)
341 return;
342
343 smb_direct_post_send_data(t, NULL, NULL, 0, 0);
344}
345
346static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
347{
348 struct smb_direct_transport *t;
349 struct ksmbd_conn *conn;
350
351 t = kzalloc(sizeof(*t), GFP_KERNEL);
352 if (!t)
353 return NULL;
354
355 t->cm_id = cm_id;
356 cm_id->context = t;
357
358 t->status = SMB_DIRECT_CS_NEW;
359 init_waitqueue_head(&t->wait_status);
360
361 spin_lock_init(&t->reassembly_queue_lock);
362 INIT_LIST_HEAD(&t->reassembly_queue);
363 t->reassembly_data_length = 0;
364 t->reassembly_queue_length = 0;
365 init_waitqueue_head(&t->wait_reassembly_queue);
366 init_waitqueue_head(&t->wait_send_credits);
367 init_waitqueue_head(&t->wait_rw_avail_ops);
368
369 spin_lock_init(&t->receive_credit_lock);
370 spin_lock_init(&t->recvmsg_queue_lock);
371 INIT_LIST_HEAD(&t->recvmsg_queue);
372
373 spin_lock_init(&t->empty_recvmsg_queue_lock);
374 INIT_LIST_HEAD(&t->empty_recvmsg_queue);
375
376 init_waitqueue_head(&t->wait_send_payload_pending);
377 atomic_set(&t->send_payload_pending, 0);
378 init_waitqueue_head(&t->wait_send_pending);
379 atomic_set(&t->send_pending, 0);
380
381 spin_lock_init(&t->lock_new_recv_credits);
382
383 INIT_DELAYED_WORK(&t->post_recv_credits_work,
384 smb_direct_post_recv_credits);
385 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
386 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
387
388 conn = ksmbd_conn_alloc();
389 if (!conn)
390 goto err;
391 conn->transport = KSMBD_TRANS(t);
392 KSMBD_TRANS(t)->conn = conn;
393 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
394 return t;
395err:
396 kfree(t);
397 return NULL;
398}
399
400static void free_transport(struct smb_direct_transport *t)
401{
402 struct smb_direct_recvmsg *recvmsg;
403
404 wake_up_interruptible(&t->wait_send_credits);
405
406 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
407 wait_event(t->wait_send_payload_pending,
070fb21e 408 atomic_read(&t->send_payload_pending) == 0);
0626e664 409 wait_event(t->wait_send_pending,
070fb21e 410 atomic_read(&t->send_pending) == 0);
0626e664
NJ
411
412 cancel_work_sync(&t->disconnect_work);
413 cancel_delayed_work_sync(&t->post_recv_credits_work);
414 cancel_work_sync(&t->send_immediate_work);
415
416 if (t->qp) {
417 ib_drain_qp(t->qp);
418 ib_destroy_qp(t->qp);
419 }
420
421 ksmbd_debug(RDMA, "drain the reassembly queue\n");
422 do {
423 spin_lock(&t->reassembly_queue_lock);
424 recvmsg = get_first_reassembly(t);
425 if (recvmsg) {
426 list_del(&recvmsg->list);
64b39f4a 427 spin_unlock(&t->reassembly_queue_lock);
0626e664 428 put_recvmsg(t, recvmsg);
64b39f4a 429 } else {
0626e664 430 spin_unlock(&t->reassembly_queue_lock);
64b39f4a 431 }
0626e664
NJ
432 } while (recvmsg);
433 t->reassembly_data_length = 0;
434
435 if (t->send_cq)
436 ib_free_cq(t->send_cq);
437 if (t->recv_cq)
438 ib_free_cq(t->recv_cq);
439 if (t->pd)
440 ib_dealloc_pd(t->pd);
441 if (t->cm_id)
442 rdma_destroy_id(t->cm_id);
443
444 smb_direct_destroy_pools(t);
445 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
446 kfree(t);
447}
448
449static struct smb_direct_sendmsg
450*smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
451{
452 struct smb_direct_sendmsg *msg;
453
454 msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
455 if (!msg)
456 return ERR_PTR(-ENOMEM);
457 msg->transport = t;
458 INIT_LIST_HEAD(&msg->list);
459 msg->num_sge = 0;
460 return msg;
461}
462
463static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
070fb21e 464 struct smb_direct_sendmsg *msg)
0626e664
NJ
465{
466 int i;
467
468 if (msg->num_sge > 0) {
469 ib_dma_unmap_single(t->cm_id->device,
070fb21e
NJ
470 msg->sge[0].addr, msg->sge[0].length,
471 DMA_TO_DEVICE);
0626e664
NJ
472 for (i = 1; i < msg->num_sge; i++)
473 ib_dma_unmap_page(t->cm_id->device,
070fb21e
NJ
474 msg->sge[i].addr, msg->sge[i].length,
475 DMA_TO_DEVICE);
0626e664
NJ
476 }
477 mempool_free(msg, t->sendmsg_mempool);
478}
479
480static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
481{
482 switch (recvmsg->type) {
483 case SMB_DIRECT_MSG_DATA_TRANSFER: {
484 struct smb_direct_data_transfer *req =
64b39f4a
NJ
485 (struct smb_direct_data_transfer *)recvmsg->packet;
486 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
cb451720 487 + le32_to_cpu(req->data_offset));
0626e664 488 ksmbd_debug(RDMA,
070fb21e
NJ
489 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
490 le16_to_cpu(req->credits_granted),
491 le16_to_cpu(req->credits_requested),
492 req->data_length, req->remaining_data_length,
493 hdr->ProtocolId, hdr->Command);
0626e664
NJ
494 break;
495 }
496 case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
497 struct smb_direct_negotiate_req *req =
498 (struct smb_direct_negotiate_req *)recvmsg->packet;
499 ksmbd_debug(RDMA,
070fb21e
NJ
500 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
501 le16_to_cpu(req->min_version),
502 le16_to_cpu(req->max_version),
503 le16_to_cpu(req->credits_requested),
504 le32_to_cpu(req->preferred_send_size),
505 le32_to_cpu(req->max_receive_size),
506 le32_to_cpu(req->max_fragmented_size));
0626e664 507 if (le16_to_cpu(req->min_version) > 0x0100 ||
64b39f4a 508 le16_to_cpu(req->max_version) < 0x0100)
0626e664
NJ
509 return -EOPNOTSUPP;
510 if (le16_to_cpu(req->credits_requested) <= 0 ||
64b39f4a
NJ
511 le32_to_cpu(req->max_receive_size) <= 128 ||
512 le32_to_cpu(req->max_fragmented_size) <=
513 128 * 1024)
0626e664
NJ
514 return -ECONNABORTED;
515
516 break;
517 }
518 default:
519 return -EINVAL;
520 }
521 return 0;
522}
523
524static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
525{
526 struct smb_direct_recvmsg *recvmsg;
527 struct smb_direct_transport *t;
528
529 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
530 t = recvmsg->transport;
531
532 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
533 if (wc->status != IB_WC_WR_FLUSH_ERR) {
bde1694a
NJ
534 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
535 ib_wc_status_msg(wc->status), wc->status,
536 wc->opcode);
0626e664
NJ
537 smb_direct_disconnect_rdma_connection(t);
538 }
539 put_empty_recvmsg(t, recvmsg);
540 return;
541 }
542
543 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
070fb21e
NJ
544 ib_wc_status_msg(wc->status), wc->status,
545 wc->opcode);
0626e664
NJ
546
547 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
070fb21e 548 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
549
550 switch (recvmsg->type) {
551 case SMB_DIRECT_MSG_NEGOTIATE_REQ:
2ea086e3
HL
552 if (wc->byte_len < sizeof(struct smb_direct_negotiate_req)) {
553 put_empty_recvmsg(t, recvmsg);
554 return;
555 }
0626e664
NJ
556 t->negotiation_requested = true;
557 t->full_packet_received = true;
558 wake_up_interruptible(&t->wait_status);
559 break;
560 case SMB_DIRECT_MSG_DATA_TRANSFER: {
561 struct smb_direct_data_transfer *data_transfer =
562 (struct smb_direct_data_transfer *)recvmsg->packet;
2ea086e3 563 unsigned int data_length;
0626e664
NJ
564 int avail_recvmsg_count, receive_credits;
565
2ea086e3
HL
566 if (wc->byte_len <
567 offsetof(struct smb_direct_data_transfer, padding)) {
568 put_empty_recvmsg(t, recvmsg);
569 return;
570 }
571
572 data_length = le32_to_cpu(data_transfer->data_length);
0626e664 573 if (data_length) {
2ea086e3
HL
574 if (wc->byte_len < sizeof(struct smb_direct_data_transfer) +
575 (u64)data_length) {
576 put_empty_recvmsg(t, recvmsg);
577 return;
578 }
579
0626e664
NJ
580 if (t->full_packet_received)
581 recvmsg->first_segment = true;
582
583 if (le32_to_cpu(data_transfer->remaining_data_length))
584 t->full_packet_received = false;
585 else
586 t->full_packet_received = true;
587
2ea086e3 588 enqueue_reassembly(t, recvmsg, (int)data_length);
0626e664
NJ
589 wake_up_interruptible(&t->wait_reassembly_queue);
590
591 spin_lock(&t->receive_credit_lock);
592 receive_credits = --(t->recv_credits);
593 avail_recvmsg_count = t->count_avail_recvmsg;
594 spin_unlock(&t->receive_credit_lock);
595 } else {
596 put_empty_recvmsg(t, recvmsg);
597
598 spin_lock(&t->receive_credit_lock);
599 receive_credits = --(t->recv_credits);
600 avail_recvmsg_count = ++(t->count_avail_recvmsg);
601 spin_unlock(&t->receive_credit_lock);
602 }
603
604 t->recv_credit_target =
605 le16_to_cpu(data_transfer->credits_requested);
606 atomic_add(le16_to_cpu(data_transfer->credits_granted),
070fb21e 607 &t->send_credits);
0626e664
NJ
608
609 if (le16_to_cpu(data_transfer->flags) &
070fb21e 610 SMB_DIRECT_RESPONSE_REQUESTED)
0626e664
NJ
611 queue_work(smb_direct_wq, &t->send_immediate_work);
612
613 if (atomic_read(&t->send_credits) > 0)
614 wake_up_interruptible(&t->wait_send_credits);
615
64b39f4a 616 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
0626e664 617 mod_delayed_work(smb_direct_wq,
070fb21e 618 &t->post_recv_credits_work, 0);
0626e664
NJ
619 break;
620 }
621 default:
622 break;
623 }
624}
625
626static int smb_direct_post_recv(struct smb_direct_transport *t,
070fb21e 627 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
628{
629 struct ib_recv_wr wr;
630 int ret;
631
632 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
070fb21e
NJ
633 recvmsg->packet, t->max_recv_size,
634 DMA_FROM_DEVICE);
0626e664
NJ
635 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
636 if (ret)
637 return ret;
638 recvmsg->sge.length = t->max_recv_size;
639 recvmsg->sge.lkey = t->pd->local_dma_lkey;
640 recvmsg->cqe.done = recv_done;
641
642 wr.wr_cqe = &recvmsg->cqe;
643 wr.next = NULL;
644 wr.sg_list = &recvmsg->sge;
645 wr.num_sge = 1;
646
647 ret = ib_post_recv(t->qp, &wr, NULL);
648 if (ret) {
bde1694a 649 pr_err("Can't post recv: %d\n", ret);
0626e664 650 ib_dma_unmap_single(t->cm_id->device,
070fb21e
NJ
651 recvmsg->sge.addr, recvmsg->sge.length,
652 DMA_FROM_DEVICE);
0626e664
NJ
653 smb_direct_disconnect_rdma_connection(t);
654 return ret;
655 }
656 return ret;
657}
658
659static int smb_direct_read(struct ksmbd_transport *t, char *buf,
070fb21e 660 unsigned int size)
0626e664
NJ
661{
662 struct smb_direct_recvmsg *recvmsg;
663 struct smb_direct_data_transfer *data_transfer;
664 int to_copy, to_read, data_read, offset;
665 u32 data_length, remaining_data_length, data_offset;
666 int rc;
02d4b4aa 667 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
668
669again:
670 if (st->status != SMB_DIRECT_CS_CONNECTED) {
bde1694a 671 pr_err("disconnected\n");
0626e664
NJ
672 return -ENOTCONN;
673 }
674
675 /*
676 * No need to hold the reassembly queue lock all the time as we are
677 * the only one reading from the front of the queue. The transport
678 * may add more entries to the back of the queue at the same time
679 */
680 if (st->reassembly_data_length >= size) {
681 int queue_length;
682 int queue_removed = 0;
683
684 /*
685 * Need to make sure reassembly_data_length is read before
686 * reading reassembly_queue_length and calling
687 * get_first_reassembly. This call is lock free
688 * as we never read at the end of the queue which are being
689 * updated in SOFTIRQ as more data is received
690 */
691 virt_rmb();
692 queue_length = st->reassembly_queue_length;
693 data_read = 0;
694 to_read = size;
695 offset = st->first_entry_offset;
696 while (data_read < size) {
697 recvmsg = get_first_reassembly(st);
698 data_transfer = smb_direct_recvmsg_payload(recvmsg);
699 data_length = le32_to_cpu(data_transfer->data_length);
700 remaining_data_length =
64b39f4a 701 le32_to_cpu(data_transfer->remaining_data_length);
0626e664
NJ
702 data_offset = le32_to_cpu(data_transfer->data_offset);
703
704 /*
705 * The upper layer expects RFC1002 length at the
706 * beginning of the payload. Return it to indicate
707 * the total length of the packet. This minimize the
708 * change to upper layer packet processing logic. This
709 * will be eventually remove when an intermediate
710 * transport layer is added
711 */
712 if (recvmsg->first_segment && size == 4) {
713 unsigned int rfc1002_len =
714 data_length + remaining_data_length;
715 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
716 data_read = 4;
717 recvmsg->first_segment = false;
718 ksmbd_debug(RDMA,
070fb21e
NJ
719 "returning rfc1002 length %d\n",
720 rfc1002_len);
0626e664
NJ
721 goto read_rfc1002_done;
722 }
723
724 to_copy = min_t(int, data_length - offset, to_read);
64b39f4a 725 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
070fb21e 726 to_copy);
0626e664
NJ
727
728 /* move on to the next buffer? */
729 if (to_copy == data_length - offset) {
730 queue_length--;
731 /*
732 * No need to lock if we are not at the
733 * end of the queue
734 */
64b39f4a 735 if (queue_length) {
0626e664 736 list_del(&recvmsg->list);
64b39f4a
NJ
737 } else {
738 spin_lock_irq(&st->reassembly_queue_lock);
0626e664 739 list_del(&recvmsg->list);
64b39f4a 740 spin_unlock_irq(&st->reassembly_queue_lock);
0626e664
NJ
741 }
742 queue_removed++;
743 put_recvmsg(st, recvmsg);
744 offset = 0;
64b39f4a 745 } else {
0626e664 746 offset += to_copy;
64b39f4a 747 }
0626e664
NJ
748
749 to_read -= to_copy;
750 data_read += to_copy;
751 }
752
753 spin_lock_irq(&st->reassembly_queue_lock);
754 st->reassembly_data_length -= data_read;
755 st->reassembly_queue_length -= queue_removed;
756 spin_unlock_irq(&st->reassembly_queue_lock);
757
758 spin_lock(&st->receive_credit_lock);
759 st->count_avail_recvmsg += queue_removed;
64b39f4a 760 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
0626e664
NJ
761 spin_unlock(&st->receive_credit_lock);
762 mod_delayed_work(smb_direct_wq,
070fb21e 763 &st->post_recv_credits_work, 0);
64b39f4a 764 } else {
0626e664 765 spin_unlock(&st->receive_credit_lock);
64b39f4a 766 }
0626e664
NJ
767
768 st->first_entry_offset = offset;
769 ksmbd_debug(RDMA,
070fb21e
NJ
770 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
771 data_read, st->reassembly_data_length,
772 st->first_entry_offset);
0626e664
NJ
773read_rfc1002_done:
774 return data_read;
775 }
776
777 ksmbd_debug(RDMA, "wait_event on more data\n");
64b39f4a 778 rc = wait_event_interruptible(st->wait_reassembly_queue,
070fb21e
NJ
779 st->reassembly_data_length >= size ||
780 st->status != SMB_DIRECT_CS_CONNECTED);
0626e664
NJ
781 if (rc)
782 return -EINTR;
783
784 goto again;
785}
786
787static void smb_direct_post_recv_credits(struct work_struct *work)
788{
789 struct smb_direct_transport *t = container_of(work,
790 struct smb_direct_transport, post_recv_credits_work.work);
791 struct smb_direct_recvmsg *recvmsg;
792 int receive_credits, credits = 0;
793 int ret;
794 int use_free = 1;
795
796 spin_lock(&t->receive_credit_lock);
797 receive_credits = t->recv_credits;
798 spin_unlock(&t->receive_credit_lock);
799
800 if (receive_credits < t->recv_credit_target) {
801 while (true) {
802 if (use_free)
803 recvmsg = get_free_recvmsg(t);
804 else
805 recvmsg = get_empty_recvmsg(t);
806 if (!recvmsg) {
807 if (use_free) {
808 use_free = 0;
809 continue;
64b39f4a 810 } else {
0626e664 811 break;
64b39f4a 812 }
0626e664
NJ
813 }
814
815 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
816 recvmsg->first_segment = false;
817
818 ret = smb_direct_post_recv(t, recvmsg);
819 if (ret) {
bde1694a 820 pr_err("Can't post recv: %d\n", ret);
0626e664
NJ
821 put_recvmsg(t, recvmsg);
822 break;
823 }
824 credits++;
825 }
826 }
827
828 spin_lock(&t->receive_credit_lock);
829 t->recv_credits += credits;
830 t->count_avail_recvmsg -= credits;
831 spin_unlock(&t->receive_credit_lock);
832
833 spin_lock(&t->lock_new_recv_credits);
834 t->new_recv_credits += credits;
835 spin_unlock(&t->lock_new_recv_credits);
836
837 if (credits)
838 queue_work(smb_direct_wq, &t->send_immediate_work);
839}
840
841static void send_done(struct ib_cq *cq, struct ib_wc *wc)
842{
843 struct smb_direct_sendmsg *sendmsg, *sibling;
844 struct smb_direct_transport *t;
845 struct list_head *pos, *prev, *end;
846
847 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
848 t = sendmsg->transport;
849
850 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
070fb21e
NJ
851 ib_wc_status_msg(wc->status), wc->status,
852 wc->opcode);
0626e664
NJ
853
854 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
bde1694a
NJ
855 pr_err("Send error. status='%s (%d)', opcode=%d\n",
856 ib_wc_status_msg(wc->status), wc->status,
857 wc->opcode);
0626e664
NJ
858 smb_direct_disconnect_rdma_connection(t);
859 }
860
861 if (sendmsg->num_sge > 1) {
862 if (atomic_dec_and_test(&t->send_payload_pending))
863 wake_up(&t->wait_send_payload_pending);
864 } else {
865 if (atomic_dec_and_test(&t->send_pending))
866 wake_up(&t->wait_send_pending);
867 }
868
869 /* iterate and free the list of messages in reverse. the list's head
870 * is invalid.
871 */
872 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
070fb21e 873 prev != end; pos = prev, prev = prev->prev) {
0626e664
NJ
874 sibling = container_of(pos, struct smb_direct_sendmsg, list);
875 smb_direct_free_sendmsg(t, sibling);
876 }
877
878 sibling = container_of(pos, struct smb_direct_sendmsg, list);
879 smb_direct_free_sendmsg(t, sibling);
880}
881
882static int manage_credits_prior_sending(struct smb_direct_transport *t)
883{
884 int new_credits;
885
886 spin_lock(&t->lock_new_recv_credits);
887 new_credits = t->new_recv_credits;
888 t->new_recv_credits = 0;
889 spin_unlock(&t->lock_new_recv_credits);
890
891 return new_credits;
892}
893
894static int smb_direct_post_send(struct smb_direct_transport *t,
070fb21e 895 struct ib_send_wr *wr)
0626e664
NJ
896{
897 int ret;
898
899 if (wr->num_sge > 1)
900 atomic_inc(&t->send_payload_pending);
901 else
902 atomic_inc(&t->send_pending);
903
904 ret = ib_post_send(t->qp, wr, NULL);
905 if (ret) {
bde1694a 906 pr_err("failed to post send: %d\n", ret);
0626e664
NJ
907 if (wr->num_sge > 1) {
908 if (atomic_dec_and_test(&t->send_payload_pending))
909 wake_up(&t->wait_send_payload_pending);
910 } else {
911 if (atomic_dec_and_test(&t->send_pending))
912 wake_up(&t->wait_send_pending);
913 }
914 smb_direct_disconnect_rdma_connection(t);
915 }
916 return ret;
917}
918
919static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
070fb21e
NJ
920 struct smb_direct_send_ctx *send_ctx,
921 bool need_invalidate_rkey,
922 unsigned int remote_key)
0626e664
NJ
923{
924 INIT_LIST_HEAD(&send_ctx->msg_list);
925 send_ctx->wr_cnt = 0;
926 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
927 send_ctx->remote_key = remote_key;
928}
929
930static int smb_direct_flush_send_list(struct smb_direct_transport *t,
070fb21e
NJ
931 struct smb_direct_send_ctx *send_ctx,
932 bool is_last)
0626e664
NJ
933{
934 struct smb_direct_sendmsg *first, *last;
935 int ret;
936
937 if (list_empty(&send_ctx->msg_list))
938 return 0;
939
940 first = list_first_entry(&send_ctx->msg_list,
070fb21e
NJ
941 struct smb_direct_sendmsg,
942 list);
0626e664 943 last = list_last_entry(&send_ctx->msg_list,
070fb21e
NJ
944 struct smb_direct_sendmsg,
945 list);
0626e664
NJ
946
947 last->wr.send_flags = IB_SEND_SIGNALED;
948 last->wr.wr_cqe = &last->cqe;
949 if (is_last && send_ctx->need_invalidate_rkey) {
950 last->wr.opcode = IB_WR_SEND_WITH_INV;
951 last->wr.ex.invalidate_rkey = send_ctx->remote_key;
952 }
953
954 ret = smb_direct_post_send(t, &first->wr);
955 if (!ret) {
956 smb_direct_send_ctx_init(t, send_ctx,
070fb21e
NJ
957 send_ctx->need_invalidate_rkey,
958 send_ctx->remote_key);
0626e664
NJ
959 } else {
960 atomic_add(send_ctx->wr_cnt, &t->send_credits);
961 wake_up(&t->wait_send_credits);
962 list_for_each_entry_safe(first, last, &send_ctx->msg_list,
070fb21e 963 list) {
0626e664
NJ
964 smb_direct_free_sendmsg(t, first);
965 }
966 }
967 return ret;
968}
969
970static int wait_for_credits(struct smb_direct_transport *t,
070fb21e 971 wait_queue_head_t *waitq, atomic_t *credits)
0626e664
NJ
972{
973 int ret;
974
975 do {
976 if (atomic_dec_return(credits) >= 0)
977 return 0;
978
979 atomic_inc(credits);
980 ret = wait_event_interruptible(*waitq,
070fb21e
NJ
981 atomic_read(credits) > 0 ||
982 t->status != SMB_DIRECT_CS_CONNECTED);
0626e664
NJ
983
984 if (t->status != SMB_DIRECT_CS_CONNECTED)
985 return -ENOTCONN;
986 else if (ret < 0)
987 return ret;
988 } while (true);
989}
990
991static int wait_for_send_credits(struct smb_direct_transport *t,
070fb21e 992 struct smb_direct_send_ctx *send_ctx)
0626e664
NJ
993{
994 int ret;
995
070fb21e
NJ
996 if (send_ctx &&
997 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
0626e664
NJ
998 ret = smb_direct_flush_send_list(t, send_ctx, false);
999 if (ret)
1000 return ret;
1001 }
1002
1003 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
1004}
1005
1006static int smb_direct_create_header(struct smb_direct_transport *t,
070fb21e
NJ
1007 int size, int remaining_data_length,
1008 struct smb_direct_sendmsg **sendmsg_out)
0626e664
NJ
1009{
1010 struct smb_direct_sendmsg *sendmsg;
1011 struct smb_direct_data_transfer *packet;
1012 int header_length;
1013 int ret;
1014
1015 sendmsg = smb_direct_alloc_sendmsg(t);
8ef32967
DC
1016 if (IS_ERR(sendmsg))
1017 return PTR_ERR(sendmsg);
0626e664
NJ
1018
1019 /* Fill in the packet header */
1020 packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1021 packet->credits_requested = cpu_to_le16(t->send_credit_target);
1022 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1023
1024 packet->flags = 0;
1025 packet->reserved = 0;
1026 if (!size)
1027 packet->data_offset = 0;
1028 else
1029 packet->data_offset = cpu_to_le32(24);
1030 packet->data_length = cpu_to_le32(size);
1031 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1032 packet->padding = 0;
1033
1034 ksmbd_debug(RDMA,
070fb21e
NJ
1035 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1036 le16_to_cpu(packet->credits_requested),
1037 le16_to_cpu(packet->credits_granted),
1038 le32_to_cpu(packet->data_offset),
1039 le32_to_cpu(packet->data_length),
1040 le32_to_cpu(packet->remaining_data_length));
0626e664
NJ
1041
1042 /* Map the packet to DMA */
1043 header_length = sizeof(struct smb_direct_data_transfer);
1044 /* If this is a packet without payload, don't send padding */
1045 if (!size)
1046 header_length =
1047 offsetof(struct smb_direct_data_transfer, padding);
1048
1049 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1050 (void *)packet,
1051 header_length,
1052 DMA_TO_DEVICE);
1053 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1054 if (ret) {
1055 smb_direct_free_sendmsg(t, sendmsg);
1056 return ret;
1057 }
1058
1059 sendmsg->num_sge = 1;
1060 sendmsg->sge[0].length = header_length;
1061 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1062
1063 *sendmsg_out = sendmsg;
1064 return 0;
1065}
1066
64b39f4a 1067static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
0626e664
NJ
1068{
1069 bool high = is_vmalloc_addr(buf);
1070 struct page *page;
1071 int offset, len;
1072 int i = 0;
1073
8ad8dc34 1074 if (nentries < get_buf_page_count(buf, size))
0626e664
NJ
1075 return -EINVAL;
1076
1077 offset = offset_in_page(buf);
1078 buf -= offset;
1079 while (size > 0) {
1080 len = min_t(int, PAGE_SIZE - offset, size);
1081 if (high)
1082 page = vmalloc_to_page(buf);
1083 else
1084 page = kmap_to_page(buf);
1085
1086 if (!sg_list)
1087 return -EINVAL;
1088 sg_set_page(sg_list, page, len, offset);
1089 sg_list = sg_next(sg_list);
1090
1091 buf += PAGE_SIZE;
1092 size -= len;
1093 offset = 0;
1094 i++;
1095 }
1096 return i;
1097}
1098
1099static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
070fb21e
NJ
1100 struct scatterlist *sg_list, int nentries,
1101 enum dma_data_direction dir)
0626e664
NJ
1102{
1103 int npages;
1104
1105 npages = get_sg_list(buf, size, sg_list, nentries);
1106 if (npages <= 0)
1107 return -EINVAL;
1108 return ib_dma_map_sg(device, sg_list, npages, dir);
1109}
1110
1111static int post_sendmsg(struct smb_direct_transport *t,
070fb21e
NJ
1112 struct smb_direct_send_ctx *send_ctx,
1113 struct smb_direct_sendmsg *msg)
0626e664
NJ
1114{
1115 int i;
1116
1117 for (i = 0; i < msg->num_sge; i++)
1118 ib_dma_sync_single_for_device(t->cm_id->device,
070fb21e
NJ
1119 msg->sge[i].addr, msg->sge[i].length,
1120 DMA_TO_DEVICE);
0626e664
NJ
1121
1122 msg->cqe.done = send_done;
1123 msg->wr.opcode = IB_WR_SEND;
1124 msg->wr.sg_list = &msg->sge[0];
1125 msg->wr.num_sge = msg->num_sge;
1126 msg->wr.next = NULL;
1127
1128 if (send_ctx) {
1129 msg->wr.wr_cqe = NULL;
1130 msg->wr.send_flags = 0;
1131 if (!list_empty(&send_ctx->msg_list)) {
1132 struct smb_direct_sendmsg *last;
1133
1134 last = list_last_entry(&send_ctx->msg_list,
1135 struct smb_direct_sendmsg,
1136 list);
1137 last->wr.next = &msg->wr;
1138 }
1139 list_add_tail(&msg->list, &send_ctx->msg_list);
1140 send_ctx->wr_cnt++;
1141 return 0;
1142 }
1143
1144 msg->wr.wr_cqe = &msg->cqe;
1145 msg->wr.send_flags = IB_SEND_SIGNALED;
1146 return smb_direct_post_send(t, &msg->wr);
1147}
1148
1149static int smb_direct_post_send_data(struct smb_direct_transport *t,
070fb21e
NJ
1150 struct smb_direct_send_ctx *send_ctx,
1151 struct kvec *iov, int niov,
1152 int remaining_data_length)
0626e664
NJ
1153{
1154 int i, j, ret;
1155 struct smb_direct_sendmsg *msg;
1156 int data_length;
64b39f4a 1157 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
0626e664
NJ
1158
1159 ret = wait_for_send_credits(t, send_ctx);
1160 if (ret)
1161 return ret;
1162
1163 data_length = 0;
1164 for (i = 0; i < niov; i++)
1165 data_length += iov[i].iov_len;
1166
1167 ret = smb_direct_create_header(t, data_length, remaining_data_length,
1168 &msg);
1169 if (ret) {
1170 atomic_inc(&t->send_credits);
1171 return ret;
1172 }
1173
1174 for (i = 0; i < niov; i++) {
1175 struct ib_sge *sge;
1176 int sg_cnt;
1177
64b39f4a 1178 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
0626e664 1179 sg_cnt = get_mapped_sg_list(t->cm_id->device,
070fb21e
NJ
1180 iov[i].iov_base, iov[i].iov_len,
1181 sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1182 DMA_TO_DEVICE);
0626e664 1183 if (sg_cnt <= 0) {
bde1694a 1184 pr_err("failed to map buffer\n");
bc3fcc94 1185 ret = -ENOMEM;
0626e664 1186 goto err;
72d6cbb5 1187 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
bde1694a 1188 pr_err("buffer not fitted into sges\n");
0626e664
NJ
1189 ret = -E2BIG;
1190 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1191 DMA_TO_DEVICE);
1192 goto err;
1193 }
1194
1195 for (j = 0; j < sg_cnt; j++) {
1196 sge = &msg->sge[msg->num_sge];
1197 sge->addr = sg_dma_address(&sg[j]);
1198 sge->length = sg_dma_len(&sg[j]);
1199 sge->lkey = t->pd->local_dma_lkey;
1200 msg->num_sge++;
1201 }
1202 }
1203
1204 ret = post_sendmsg(t, send_ctx, msg);
1205 if (ret)
1206 goto err;
1207 return 0;
1208err:
1209 smb_direct_free_sendmsg(t, msg);
1210 atomic_inc(&t->send_credits);
1211 return ret;
1212}
1213
1214static int smb_direct_writev(struct ksmbd_transport *t,
070fb21e
NJ
1215 struct kvec *iov, int niovs, int buflen,
1216 bool need_invalidate, unsigned int remote_key)
0626e664 1217{
02d4b4aa 1218 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
1219 int remaining_data_length;
1220 int start, i, j;
1221 int max_iov_size = st->max_send_size -
1222 sizeof(struct smb_direct_data_transfer);
1223 int ret;
1224 struct kvec vec;
1225 struct smb_direct_send_ctx send_ctx;
1226
b8fc94cd
NJ
1227 if (st->status != SMB_DIRECT_CS_CONNECTED)
1228 return -ENOTCONN;
0626e664
NJ
1229
1230 //FIXME: skip RFC1002 header..
1231 buflen -= 4;
1232 iov[0].iov_base += 4;
1233 iov[0].iov_len -= 4;
1234
1235 remaining_data_length = buflen;
1236 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1237
1238 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1239 start = i = 0;
1240 buflen = 0;
1241 while (true) {
1242 buflen += iov[i].iov_len;
1243 if (buflen > max_iov_size) {
1244 if (i > start) {
1245 remaining_data_length -=
64b39f4a 1246 (buflen - iov[i].iov_len);
0626e664 1247 ret = smb_direct_post_send_data(st, &send_ctx,
070fb21e
NJ
1248 &iov[start], i - start,
1249 remaining_data_length);
0626e664
NJ
1250 if (ret)
1251 goto done;
1252 } else {
1253 /* iov[start] is too big, break it */
64b39f4a 1254 int nvec = (buflen + max_iov_size - 1) /
0626e664
NJ
1255 max_iov_size;
1256
1257 for (j = 0; j < nvec; j++) {
1258 vec.iov_base =
1259 (char *)iov[start].iov_base +
64b39f4a 1260 j * max_iov_size;
0626e664
NJ
1261 vec.iov_len =
1262 min_t(int, max_iov_size,
070fb21e 1263 buflen - max_iov_size * j);
0626e664 1264 remaining_data_length -= vec.iov_len;
070fb21e
NJ
1265 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1266 remaining_data_length);
0626e664
NJ
1267 if (ret)
1268 goto done;
1269 }
1270 i++;
1271 if (i == niovs)
1272 break;
1273 }
1274 start = i;
1275 buflen = 0;
1276 } else {
1277 i++;
1278 if (i == niovs) {
1279 /* send out all remaining vecs */
1280 remaining_data_length -= buflen;
1281 ret = smb_direct_post_send_data(st, &send_ctx,
070fb21e
NJ
1282 &iov[start], i - start,
1283 remaining_data_length);
0626e664
NJ
1284 if (ret)
1285 goto done;
1286 break;
1287 }
1288 }
1289 }
1290
1291done:
1292 ret = smb_direct_flush_send_list(st, &send_ctx, true);
1293
1294 /*
1295 * As an optimization, we don't wait for individual I/O to finish
1296 * before sending the next one.
1297 * Send them all and wait for pending send count to get to 0
1298 * that means all the I/Os have been out and we are good to return
1299 */
1300
1301 wait_event(st->wait_send_payload_pending,
070fb21e 1302 atomic_read(&st->send_payload_pending) == 0);
0626e664
NJ
1303 return ret;
1304}
1305
1306static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
070fb21e 1307 enum dma_data_direction dir)
0626e664
NJ
1308{
1309 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
070fb21e 1310 struct smb_direct_rdma_rw_msg, cqe);
0626e664
NJ
1311 struct smb_direct_transport *t = msg->t;
1312
1313 if (wc->status != IB_WC_SUCCESS) {
bde1694a
NJ
1314 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1315 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
0626e664
NJ
1316 smb_direct_disconnect_rdma_connection(t);
1317 }
1318
1319 if (atomic_inc_return(&t->rw_avail_ops) > 0)
1320 wake_up(&t->wait_rw_avail_ops);
1321
1322 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
070fb21e 1323 msg->sg_list, msg->sgt.nents, dir);
0626e664
NJ
1324 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1325 complete(msg->completion);
1326 kfree(msg);
1327}
1328
1329static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1330{
1331 read_write_done(cq, wc, DMA_FROM_DEVICE);
1332}
1333
1334static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1335{
1336 read_write_done(cq, wc, DMA_TO_DEVICE);
1337}
1338
1339static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
070fb21e
NJ
1340 int buf_len, u32 remote_key, u64 remote_offset,
1341 u32 remote_len, bool is_read)
0626e664
NJ
1342{
1343 struct smb_direct_rdma_rw_msg *msg;
1344 int ret;
1345 DECLARE_COMPLETION_ONSTACK(completion);
1346 struct ib_send_wr *first_wr = NULL;
1347
1348 ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
1349 if (ret < 0)
1350 return ret;
1351
1352 /* TODO: mempool */
1353 msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
070fb21e 1354 sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
0626e664
NJ
1355 if (!msg) {
1356 atomic_inc(&t->rw_avail_ops);
1357 return -ENOMEM;
1358 }
1359
1360 msg->sgt.sgl = &msg->sg_list[0];
1361 ret = sg_alloc_table_chained(&msg->sgt,
8ad8dc34 1362 get_buf_page_count(buf, buf_len),
070fb21e 1363 msg->sg_list, SG_CHUNK_SIZE);
0626e664
NJ
1364 if (ret) {
1365 atomic_inc(&t->rw_avail_ops);
1366 kfree(msg);
1367 return -ENOMEM;
1368 }
1369
1370 ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents);
1371 if (ret <= 0) {
bde1694a 1372 pr_err("failed to get pages\n");
0626e664
NJ
1373 goto err;
1374 }
1375
1376 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
8ad8dc34 1377 msg->sg_list, get_buf_page_count(buf, buf_len),
070fb21e
NJ
1378 0, remote_offset, remote_key,
1379 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
0626e664 1380 if (ret < 0) {
bde1694a 1381 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
0626e664
NJ
1382 goto err;
1383 }
1384
1385 msg->t = t;
1386 msg->cqe.done = is_read ? read_done : write_done;
1387 msg->completion = &completion;
1388 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
070fb21e 1389 &msg->cqe, NULL);
0626e664
NJ
1390
1391 ret = ib_post_send(t->qp, first_wr, NULL);
1392 if (ret) {
bde1694a 1393 pr_err("failed to post send wr: %d\n", ret);
0626e664
NJ
1394 goto err;
1395 }
1396
1397 wait_for_completion(&completion);
1398 return 0;
1399
1400err:
1401 atomic_inc(&t->rw_avail_ops);
1402 if (first_wr)
1403 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
070fb21e
NJ
1404 msg->sg_list, msg->sgt.nents,
1405 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
0626e664
NJ
1406 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1407 kfree(msg);
1408 return ret;
0626e664
NJ
1409}
1410
64b39f4a 1411static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
070fb21e
NJ
1412 unsigned int buflen, u32 remote_key,
1413 u64 remote_offset, u32 remote_len)
0626e664 1414{
02d4b4aa 1415 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
070fb21e
NJ
1416 remote_key, remote_offset,
1417 remote_len, false);
0626e664
NJ
1418}
1419
64b39f4a 1420static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
070fb21e
NJ
1421 unsigned int buflen, u32 remote_key,
1422 u64 remote_offset, u32 remote_len)
0626e664 1423{
02d4b4aa 1424 return smb_direct_rdma_xmit(smb_trans_direct_transfort(t), buf, buflen,
070fb21e
NJ
1425 remote_key, remote_offset,
1426 remote_len, true);
0626e664
NJ
1427}
1428
1429static void smb_direct_disconnect(struct ksmbd_transport *t)
1430{
02d4b4aa 1431 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
1432
1433 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1434
323b1ea1 1435 smb_direct_disconnect_rdma_work(&st->disconnect_work);
0626e664 1436 wait_event_interruptible(st->wait_status,
070fb21e 1437 st->status == SMB_DIRECT_CS_DISCONNECTED);
0626e664
NJ
1438 free_transport(st);
1439}
1440
1441static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
070fb21e 1442 struct rdma_cm_event *event)
0626e664
NJ
1443{
1444 struct smb_direct_transport *t = cm_id->context;
1445
1446 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
070fb21e 1447 cm_id, rdma_event_msg(event->event), event->event);
0626e664
NJ
1448
1449 switch (event->event) {
1450 case RDMA_CM_EVENT_ESTABLISHED: {
1451 t->status = SMB_DIRECT_CS_CONNECTED;
1452 wake_up_interruptible(&t->wait_status);
1453 break;
1454 }
1455 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1456 case RDMA_CM_EVENT_DISCONNECTED: {
1457 t->status = SMB_DIRECT_CS_DISCONNECTED;
1458 wake_up_interruptible(&t->wait_status);
1459 wake_up_interruptible(&t->wait_reassembly_queue);
1460 wake_up(&t->wait_send_credits);
1461 break;
1462 }
1463 case RDMA_CM_EVENT_CONNECT_ERROR: {
1464 t->status = SMB_DIRECT_CS_DISCONNECTED;
1465 wake_up_interruptible(&t->wait_status);
1466 break;
1467 }
1468 default:
bde1694a
NJ
1469 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1470 cm_id, rdma_event_msg(event->event),
1471 event->event);
0626e664
NJ
1472 break;
1473 }
1474 return 0;
1475}
1476
1477static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1478{
1479 struct smb_direct_transport *t = context;
1480
1481 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
070fb21e 1482 t->cm_id, ib_event_msg(event->event), event->event);
0626e664
NJ
1483
1484 switch (event->event) {
1485 case IB_EVENT_CQ_ERR:
1486 case IB_EVENT_QP_FATAL:
1487 smb_direct_disconnect_rdma_connection(t);
1488 break;
1489 default:
1490 break;
1491 }
1492}
1493
1494static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
070fb21e 1495 int failed)
0626e664
NJ
1496{
1497 struct smb_direct_sendmsg *sendmsg;
1498 struct smb_direct_negotiate_resp *resp;
1499 int ret;
1500
1501 sendmsg = smb_direct_alloc_sendmsg(t);
1502 if (IS_ERR(sendmsg))
1503 return -ENOMEM;
1504
1505 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1506 if (failed) {
1507 memset(resp, 0, sizeof(*resp));
1508 resp->min_version = cpu_to_le16(0x0100);
1509 resp->max_version = cpu_to_le16(0x0100);
1510 resp->status = STATUS_NOT_SUPPORTED;
1511 } else {
1512 resp->status = STATUS_SUCCESS;
1513 resp->min_version = SMB_DIRECT_VERSION_LE;
1514 resp->max_version = SMB_DIRECT_VERSION_LE;
1515 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1516 resp->reserved = 0;
1517 resp->credits_requested =
1518 cpu_to_le16(t->send_credit_target);
64b39f4a 1519 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
0626e664
NJ
1520 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1521 resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1522 resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1523 resp->max_fragmented_size =
1524 cpu_to_le32(t->max_fragmented_recv_size);
1525 }
1526
1527 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
070fb21e
NJ
1528 (void *)resp, sizeof(*resp),
1529 DMA_TO_DEVICE);
1530 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
0626e664
NJ
1531 if (ret) {
1532 smb_direct_free_sendmsg(t, sendmsg);
1533 return ret;
1534 }
1535
1536 sendmsg->num_sge = 1;
1537 sendmsg->sge[0].length = sizeof(*resp);
1538 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1539
1540 ret = post_sendmsg(t, NULL, sendmsg);
1541 if (ret) {
1542 smb_direct_free_sendmsg(t, sendmsg);
1543 return ret;
1544 }
1545
1546 wait_event(t->wait_send_pending,
070fb21e 1547 atomic_read(&t->send_pending) == 0);
0626e664
NJ
1548 return 0;
1549}
1550
1551static int smb_direct_accept_client(struct smb_direct_transport *t)
1552{
1553 struct rdma_conn_param conn_param;
1554 struct ib_port_immutable port_immutable;
1555 u32 ird_ord_hdr[2];
1556 int ret;
1557
1558 memset(&conn_param, 0, sizeof(conn_param));
070fb21e
NJ
1559 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1560 SMB_DIRECT_CM_INITIATOR_DEPTH);
0626e664
NJ
1561 conn_param.responder_resources = 0;
1562
1563 t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
070fb21e
NJ
1564 t->cm_id->port_num,
1565 &port_immutable);
0626e664
NJ
1566 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1567 ird_ord_hdr[0] = conn_param.responder_resources;
1568 ird_ord_hdr[1] = 1;
1569 conn_param.private_data = ird_ord_hdr;
1570 conn_param.private_data_len = sizeof(ird_ord_hdr);
1571 } else {
1572 conn_param.private_data = NULL;
1573 conn_param.private_data_len = 0;
1574 }
1575 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1576 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1577 conn_param.flow_control = 0;
1578
1579 ret = rdma_accept(t->cm_id, &conn_param);
1580 if (ret) {
bde1694a 1581 pr_err("error at rdma_accept: %d\n", ret);
0626e664
NJ
1582 return ret;
1583 }
1584
1585 wait_event_interruptible(t->wait_status,
1586 t->status != SMB_DIRECT_CS_NEW);
1587 if (t->status != SMB_DIRECT_CS_CONNECTED)
1588 return -ENOTCONN;
1589 return 0;
1590}
1591
1592static int smb_direct_negotiate(struct smb_direct_transport *t)
1593{
1594 int ret;
1595 struct smb_direct_recvmsg *recvmsg;
1596 struct smb_direct_negotiate_req *req;
1597
1598 recvmsg = get_free_recvmsg(t);
1599 if (!recvmsg)
1600 return -ENOMEM;
1601 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1602
1603 ret = smb_direct_post_recv(t, recvmsg);
1604 if (ret) {
bde1694a 1605 pr_err("Can't post recv: %d\n", ret);
0626e664
NJ
1606 goto out;
1607 }
1608
1609 t->negotiation_requested = false;
1610 ret = smb_direct_accept_client(t);
1611 if (ret) {
bde1694a 1612 pr_err("Can't accept client\n");
0626e664
NJ
1613 goto out;
1614 }
1615
1616 smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
1617
1618 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1619 ret = wait_event_interruptible_timeout(t->wait_status,
070fb21e
NJ
1620 t->negotiation_requested ||
1621 t->status == SMB_DIRECT_CS_DISCONNECTED,
1622 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
0626e664
NJ
1623 if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
1624 ret = ret < 0 ? ret : -ETIMEDOUT;
1625 goto out;
1626 }
1627
1628 ret = smb_direct_check_recvmsg(recvmsg);
1629 if (ret == -ECONNABORTED)
1630 goto out;
1631
1632 req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1633 t->max_recv_size = min_t(int, t->max_recv_size,
070fb21e 1634 le32_to_cpu(req->preferred_send_size));
0626e664 1635 t->max_send_size = min_t(int, t->max_send_size,
070fb21e 1636 le32_to_cpu(req->max_receive_size));
0626e664
NJ
1637 t->max_fragmented_send_size =
1638 le32_to_cpu(req->max_fragmented_size);
1639
1640 ret = smb_direct_send_negotiate_response(t, ret);
1641out:
1642 if (recvmsg)
1643 put_recvmsg(t, recvmsg);
1644 return ret;
1645}
1646
1647static int smb_direct_init_params(struct smb_direct_transport *t,
070fb21e 1648 struct ib_qp_cap *cap)
0626e664
NJ
1649{
1650 struct ib_device *device = t->cm_id->device;
1651 int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
1652
1653 /* need 2 more sge. because a SMB_DIRECT header will be mapped,
1654 * and maybe a send buffer could be not page aligned.
1655 */
1656 t->max_send_size = smb_direct_max_send_size;
1657 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
1658 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
bde1694a 1659 pr_err("max_send_size %d is too large\n", t->max_send_size);
0626e664
NJ
1660 return -EINVAL;
1661 }
1662
1663 /*
1664 * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
1665 * read/writes. HCA guarantees at least max_send_sge of sges for
1666 * a RDMA read/write work request, and if memory registration is used,
1667 * we need reg_mr, local_inv wrs for each read/write.
1668 */
1669 t->max_rdma_rw_size = smb_direct_max_read_write_size;
1670 max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1671 max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
1672 max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
1673 max_pages) * 2;
1674 max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
1675
1676 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1677 if (max_send_wrs > device->attrs.max_cqe ||
64b39f4a 1678 max_send_wrs > device->attrs.max_qp_wr) {
bde1694a
NJ
1679 pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
1680 smb_direct_send_credit_target,
1681 smb_direct_max_outstanding_rw_ops);
1682 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1683 device->attrs.max_cqe, device->attrs.max_qp_wr);
0626e664
NJ
1684 return -EINVAL;
1685 }
1686
1687 if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1688 smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
bde1694a
NJ
1689 pr_err("consider lowering receive_credit_max = %d\n",
1690 smb_direct_receive_credit_max);
1691 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1692 device->attrs.max_cqe, device->attrs.max_qp_wr);
0626e664
NJ
1693 return -EINVAL;
1694 }
1695
1696 if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
bde1694a
NJ
1697 pr_err("warning: device max_send_sge = %d too small\n",
1698 device->attrs.max_send_sge);
0626e664
NJ
1699 return -EINVAL;
1700 }
1701 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
bde1694a
NJ
1702 pr_err("warning: device max_recv_sge = %d too small\n",
1703 device->attrs.max_recv_sge);
0626e664
NJ
1704 return -EINVAL;
1705 }
1706
1707 t->recv_credits = 0;
1708 t->count_avail_recvmsg = 0;
1709
1710 t->recv_credit_max = smb_direct_receive_credit_max;
1711 t->recv_credit_target = 10;
1712 t->new_recv_credits = 0;
1713
1714 t->send_credit_target = smb_direct_send_credit_target;
1715 atomic_set(&t->send_credits, 0);
1716 atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
1717
1718 t->max_send_size = smb_direct_max_send_size;
1719 t->max_recv_size = smb_direct_max_receive_size;
1720 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1721
1722 cap->max_send_wr = max_send_wrs;
1723 cap->max_recv_wr = t->recv_credit_max;
1724 cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
1725 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1726 cap->max_inline_data = 0;
1727 cap->max_rdma_ctxs = 0;
1728 return 0;
1729}
1730
1731static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1732{
1733 struct smb_direct_recvmsg *recvmsg;
1734
1735 while ((recvmsg = get_free_recvmsg(t)))
1736 mempool_free(recvmsg, t->recvmsg_mempool);
1737 while ((recvmsg = get_empty_recvmsg(t)))
1738 mempool_free(recvmsg, t->recvmsg_mempool);
1739
1740 mempool_destroy(t->recvmsg_mempool);
1741 t->recvmsg_mempool = NULL;
1742
1743 kmem_cache_destroy(t->recvmsg_cache);
1744 t->recvmsg_cache = NULL;
1745
1746 mempool_destroy(t->sendmsg_mempool);
1747 t->sendmsg_mempool = NULL;
1748
1749 kmem_cache_destroy(t->sendmsg_cache);
1750 t->sendmsg_cache = NULL;
1751}
1752
1753static int smb_direct_create_pools(struct smb_direct_transport *t)
1754{
1755 char name[80];
1756 int i;
1757 struct smb_direct_recvmsg *recvmsg;
1758
1759 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1760 t->sendmsg_cache = kmem_cache_create(name,
070fb21e
NJ
1761 sizeof(struct smb_direct_sendmsg) +
1762 sizeof(struct smb_direct_negotiate_resp),
1763 0, SLAB_HWCACHE_ALIGN, NULL);
0626e664
NJ
1764 if (!t->sendmsg_cache)
1765 return -ENOMEM;
1766
1767 t->sendmsg_mempool = mempool_create(t->send_credit_target,
070fb21e
NJ
1768 mempool_alloc_slab, mempool_free_slab,
1769 t->sendmsg_cache);
0626e664
NJ
1770 if (!t->sendmsg_mempool)
1771 goto err;
1772
1773 snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1774 t->recvmsg_cache = kmem_cache_create(name,
070fb21e
NJ
1775 sizeof(struct smb_direct_recvmsg) +
1776 t->max_recv_size,
1777 0, SLAB_HWCACHE_ALIGN, NULL);
0626e664
NJ
1778 if (!t->recvmsg_cache)
1779 goto err;
1780
1781 t->recvmsg_mempool =
1782 mempool_create(t->recv_credit_max, mempool_alloc_slab,
070fb21e 1783 mempool_free_slab, t->recvmsg_cache);
0626e664
NJ
1784 if (!t->recvmsg_mempool)
1785 goto err;
1786
1787 INIT_LIST_HEAD(&t->recvmsg_queue);
1788
1789 for (i = 0; i < t->recv_credit_max; i++) {
1790 recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
1791 if (!recvmsg)
1792 goto err;
1793 recvmsg->transport = t;
1794 list_add(&recvmsg->list, &t->recvmsg_queue);
1795 }
1796 t->count_avail_recvmsg = t->recv_credit_max;
1797
1798 return 0;
1799err:
1800 smb_direct_destroy_pools(t);
1801 return -ENOMEM;
1802}
1803
1804static int smb_direct_create_qpair(struct smb_direct_transport *t,
070fb21e 1805 struct ib_qp_cap *cap)
0626e664
NJ
1806{
1807 int ret;
1808 struct ib_qp_init_attr qp_attr;
1809
1810 t->pd = ib_alloc_pd(t->cm_id->device, 0);
1811 if (IS_ERR(t->pd)) {
bde1694a 1812 pr_err("Can't create RDMA PD\n");
0626e664
NJ
1813 ret = PTR_ERR(t->pd);
1814 t->pd = NULL;
1815 return ret;
1816 }
1817
1818 t->send_cq = ib_alloc_cq(t->cm_id->device, t,
070fb21e 1819 t->send_credit_target, 0, IB_POLL_WORKQUEUE);
0626e664 1820 if (IS_ERR(t->send_cq)) {
bde1694a 1821 pr_err("Can't create RDMA send CQ\n");
0626e664
NJ
1822 ret = PTR_ERR(t->send_cq);
1823 t->send_cq = NULL;
1824 goto err;
1825 }
1826
1827 t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
070fb21e
NJ
1828 cap->max_send_wr + cap->max_rdma_ctxs,
1829 0, IB_POLL_WORKQUEUE);
0626e664 1830 if (IS_ERR(t->recv_cq)) {
bde1694a 1831 pr_err("Can't create RDMA recv CQ\n");
0626e664
NJ
1832 ret = PTR_ERR(t->recv_cq);
1833 t->recv_cq = NULL;
1834 goto err;
1835 }
1836
1837 memset(&qp_attr, 0, sizeof(qp_attr));
1838 qp_attr.event_handler = smb_direct_qpair_handler;
1839 qp_attr.qp_context = t;
1840 qp_attr.cap = *cap;
1841 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1842 qp_attr.qp_type = IB_QPT_RC;
1843 qp_attr.send_cq = t->send_cq;
1844 qp_attr.recv_cq = t->recv_cq;
1845 qp_attr.port_num = ~0;
1846
1847 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1848 if (ret) {
bde1694a 1849 pr_err("Can't create RDMA QP: %d\n", ret);
0626e664
NJ
1850 goto err;
1851 }
1852
1853 t->qp = t->cm_id->qp;
1854 t->cm_id->event_handler = smb_direct_cm_handler;
1855
1856 return 0;
1857err:
1858 if (t->qp) {
1859 ib_destroy_qp(t->qp);
1860 t->qp = NULL;
1861 }
1862 if (t->recv_cq) {
1863 ib_destroy_cq(t->recv_cq);
1864 t->recv_cq = NULL;
1865 }
1866 if (t->send_cq) {
1867 ib_destroy_cq(t->send_cq);
1868 t->send_cq = NULL;
1869 }
1870 if (t->pd) {
1871 ib_dealloc_pd(t->pd);
1872 t->pd = NULL;
1873 }
1874 return ret;
1875}
1876
1877static int smb_direct_prepare(struct ksmbd_transport *t)
1878{
02d4b4aa 1879 struct smb_direct_transport *st = smb_trans_direct_transfort(t);
0626e664
NJ
1880 int ret;
1881 struct ib_qp_cap qp_cap;
1882
1883 ret = smb_direct_init_params(st, &qp_cap);
1884 if (ret) {
bde1694a 1885 pr_err("Can't configure RDMA parameters\n");
0626e664
NJ
1886 return ret;
1887 }
1888
1889 ret = smb_direct_create_pools(st);
1890 if (ret) {
bde1694a 1891 pr_err("Can't init RDMA pool: %d\n", ret);
0626e664
NJ
1892 return ret;
1893 }
1894
1895 ret = smb_direct_create_qpair(st, &qp_cap);
1896 if (ret) {
bde1694a 1897 pr_err("Can't accept RDMA client: %d\n", ret);
0626e664
NJ
1898 return ret;
1899 }
1900
1901 ret = smb_direct_negotiate(st);
1902 if (ret) {
bde1694a 1903 pr_err("Can't negotiate: %d\n", ret);
0626e664
NJ
1904 return ret;
1905 }
1906
1907 st->status = SMB_DIRECT_CS_CONNECTED;
1908 return 0;
1909}
1910
1911static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
1912{
1913 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
1914 return false;
1915 if (attrs->max_fast_reg_page_list_len == 0)
1916 return false;
1917 return true;
1918}
1919
1920static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
1921{
1922 struct smb_direct_transport *t;
1923
1924 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
1925 ksmbd_debug(RDMA,
070fb21e
NJ
1926 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
1927 new_cm_id->device->attrs.device_cap_flags);
0626e664
NJ
1928 return -EPROTONOSUPPORT;
1929 }
1930
1931 t = alloc_transport(new_cm_id);
1932 if (!t)
1933 return -ENOMEM;
1934
1935 KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
070fb21e
NJ
1936 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
1937 SMB_DIRECT_PORT);
0626e664
NJ
1938 if (IS_ERR(KSMBD_TRANS(t)->handler)) {
1939 int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
1940
bde1694a 1941 pr_err("Can't start thread\n");
0626e664
NJ
1942 free_transport(t);
1943 return ret;
1944 }
1945
1946 return 0;
1947}
1948
1949static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
070fb21e 1950 struct rdma_cm_event *event)
0626e664
NJ
1951{
1952 switch (event->event) {
1953 case RDMA_CM_EVENT_CONNECT_REQUEST: {
1954 int ret = smb_direct_handle_connect_request(cm_id);
1955
1956 if (ret) {
bde1694a 1957 pr_err("Can't create transport: %d\n", ret);
0626e664
NJ
1958 return ret;
1959 }
1960
1961 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
070fb21e 1962 cm_id);
0626e664
NJ
1963 break;
1964 }
1965 default:
bde1694a
NJ
1966 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
1967 cm_id, rdma_event_msg(event->event), event->event);
0626e664
NJ
1968 break;
1969 }
1970 return 0;
1971}
1972
1973static int smb_direct_listen(int port)
1974{
1975 int ret;
1976 struct rdma_cm_id *cm_id;
1977 struct sockaddr_in sin = {
1978 .sin_family = AF_INET,
1979 .sin_addr.s_addr = htonl(INADDR_ANY),
1980 .sin_port = htons(port),
1981 };
1982
1983 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
070fb21e 1984 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
0626e664 1985 if (IS_ERR(cm_id)) {
bde1694a 1986 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
0626e664
NJ
1987 return PTR_ERR(cm_id);
1988 }
1989
1990 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
1991 if (ret) {
bde1694a 1992 pr_err("Can't bind: %d\n", ret);
0626e664
NJ
1993 goto err;
1994 }
1995
1996 smb_direct_listener.cm_id = cm_id;
1997
1998 ret = rdma_listen(cm_id, 10);
1999 if (ret) {
bde1694a 2000 pr_err("Can't listen: %d\n", ret);
0626e664
NJ
2001 goto err;
2002 }
2003 return 0;
2004err:
2005 smb_direct_listener.cm_id = NULL;
2006 rdma_destroy_id(cm_id);
2007 return ret;
2008}
2009
2010int ksmbd_rdma_init(void)
2011{
2012 int ret;
2013
2014 smb_direct_listener.cm_id = NULL;
2015
2016 /* When a client is running out of send credits, the credits are
2017 * granted by the server's sending a packet using this queue.
2018 * This avoids the situation that a clients cannot send packets
2019 * for lack of credits
2020 */
2021 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
070fb21e 2022 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
0626e664
NJ
2023 if (!smb_direct_wq)
2024 return -ENOMEM;
2025
2026 ret = smb_direct_listen(SMB_DIRECT_PORT);
2027 if (ret) {
2028 destroy_workqueue(smb_direct_wq);
2029 smb_direct_wq = NULL;
bde1694a 2030 pr_err("Can't listen: %d\n", ret);
0626e664
NJ
2031 return ret;
2032 }
2033
2034 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
070fb21e 2035 smb_direct_listener.cm_id);
0626e664
NJ
2036 return 0;
2037}
2038
2039int ksmbd_rdma_destroy(void)
2040{
2041 if (smb_direct_listener.cm_id)
2042 rdma_destroy_id(smb_direct_listener.cm_id);
2043 smb_direct_listener.cm_id = NULL;
2044
2045 if (smb_direct_wq) {
0626e664
NJ
2046 destroy_workqueue(smb_direct_wq);
2047 smb_direct_wq = NULL;
2048 }
2049 return 0;
2050}
2051
03d8d4f1
HL
2052bool ksmbd_rdma_capable_netdev(struct net_device *netdev)
2053{
2054 struct ib_device *ibdev;
2055 bool rdma_capable = false;
2056
2057 ibdev = ib_device_get_by_netdev(netdev, RDMA_DRIVER_UNKNOWN);
2058 if (ibdev) {
2059 if (rdma_frwr_is_supported(&ibdev->attrs))
2060 rdma_capable = true;
2061 ib_device_put(ibdev);
2062 }
2063 return rdma_capable;
2064}
2065
0626e664
NJ
2066static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2067 .prepare = smb_direct_prepare,
2068 .disconnect = smb_direct_disconnect,
2069 .writev = smb_direct_writev,
2070 .read = smb_direct_read,
2071 .rdma_read = smb_direct_rdma_read,
2072 .rdma_write = smb_direct_rdma_write,
2073};