ksmbd: remove and replace macros with inline functions in smb_common.h
[linux-block.git] / fs / ksmbd / transport_rdma.c
CommitLineData
0626e664
NJ
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2017, Microsoft Corporation.
4 * Copyright (C) 2018, LG Electronics.
5 *
6 * Author(s): Long Li <longli@microsoft.com>,
7 * Hyunchul Lee <hyc.lee@gmail.com>
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 * the GNU General Public License for more details.
18 */
19
20#define SUBMOD_NAME "smb_direct"
21
22#include <linux/kthread.h>
23#include <linux/rwlock.h>
24#include <linux/list.h>
25#include <linux/mempool.h>
26#include <linux/highmem.h>
27#include <linux/scatterlist.h>
28#include <rdma/ib_verbs.h>
29#include <rdma/rdma_cm.h>
30#include <rdma/rw.h>
31
32#include "glob.h"
33#include "connection.h"
34#include "smb_common.h"
35#include "smbstatus.h"
0626e664
NJ
36#include "transport_rdma.h"
37
38#define SMB_DIRECT_PORT 5445
39
40#define SMB_DIRECT_VERSION_LE cpu_to_le16(0x0100)
41
42/* SMB_DIRECT negotiation timeout in seconds */
43#define SMB_DIRECT_NEGOTIATE_TIMEOUT 120
44
45#define SMB_DIRECT_MAX_SEND_SGES 8
46#define SMB_DIRECT_MAX_RECV_SGES 1
47
48/*
49 * Default maximum number of RDMA read/write outstanding on this connection
50 * This value is possibly decreased during QP creation on hardware limit
51 */
52#define SMB_DIRECT_CM_INITIATOR_DEPTH 8
53
54/* Maximum number of retries on data transfer operations */
55#define SMB_DIRECT_CM_RETRY 6
56/* No need to retry on Receiver Not Ready since SMB_DIRECT manages credits */
57#define SMB_DIRECT_CM_RNR_RETRY 0
58
59/*
60 * User configurable initial values per SMB_DIRECT transport connection
61 * as defined in [MS-KSMBD] 3.1.1.1
62 * Those may change after a SMB_DIRECT negotiation
63 */
64/* The local peer's maximum number of credits to grant to the peer */
65static int smb_direct_receive_credit_max = 255;
66
67/* The remote peer's credit request of local peer */
68static int smb_direct_send_credit_target = 255;
69
70/* The maximum single message size can be sent to remote peer */
71static int smb_direct_max_send_size = 8192;
72
73/* The maximum fragmented upper-layer payload receive size supported */
74static int smb_direct_max_fragmented_recv_size = 1024 * 1024;
75
76/* The maximum single-message size which can be received */
77static int smb_direct_max_receive_size = 8192;
78
79static int smb_direct_max_read_write_size = 1024 * 1024;
80
81static int smb_direct_max_outstanding_rw_ops = 8;
82
83static struct smb_direct_listener {
84 struct rdma_cm_id *cm_id;
85} smb_direct_listener;
86
0626e664
NJ
87static struct workqueue_struct *smb_direct_wq;
88
89enum smb_direct_status {
90 SMB_DIRECT_CS_NEW = 0,
91 SMB_DIRECT_CS_CONNECTED,
92 SMB_DIRECT_CS_DISCONNECTING,
93 SMB_DIRECT_CS_DISCONNECTED,
94};
95
96struct smb_direct_transport {
97 struct ksmbd_transport transport;
98
99 enum smb_direct_status status;
100 bool full_packet_received;
101 wait_queue_head_t wait_status;
102
103 struct rdma_cm_id *cm_id;
104 struct ib_cq *send_cq;
105 struct ib_cq *recv_cq;
106 struct ib_pd *pd;
107 struct ib_qp *qp;
108
109 int max_send_size;
110 int max_recv_size;
111 int max_fragmented_send_size;
112 int max_fragmented_recv_size;
113 int max_rdma_rw_size;
114
115 spinlock_t reassembly_queue_lock;
116 struct list_head reassembly_queue;
117 int reassembly_data_length;
118 int reassembly_queue_length;
119 int first_entry_offset;
120 wait_queue_head_t wait_reassembly_queue;
121
122 spinlock_t receive_credit_lock;
123 int recv_credits;
124 int count_avail_recvmsg;
125 int recv_credit_max;
126 int recv_credit_target;
127
128 spinlock_t recvmsg_queue_lock;
129 struct list_head recvmsg_queue;
130
131 spinlock_t empty_recvmsg_queue_lock;
132 struct list_head empty_recvmsg_queue;
133
134 int send_credit_target;
135 atomic_t send_credits;
136 spinlock_t lock_new_recv_credits;
137 int new_recv_credits;
138 atomic_t rw_avail_ops;
139
140 wait_queue_head_t wait_send_credits;
141 wait_queue_head_t wait_rw_avail_ops;
142
143 mempool_t *sendmsg_mempool;
144 struct kmem_cache *sendmsg_cache;
145 mempool_t *recvmsg_mempool;
146 struct kmem_cache *recvmsg_cache;
147
148 wait_queue_head_t wait_send_payload_pending;
149 atomic_t send_payload_pending;
150 wait_queue_head_t wait_send_pending;
151 atomic_t send_pending;
152
153 struct delayed_work post_recv_credits_work;
154 struct work_struct send_immediate_work;
155 struct work_struct disconnect_work;
156
157 bool negotiation_requested;
158};
159
160#define KSMBD_TRANS(t) ((struct ksmbd_transport *)&((t)->transport))
161#define SMB_DIRECT_TRANS(t) ((struct smb_direct_transport *)container_of(t, \
162 struct smb_direct_transport, transport))
163
164enum {
165 SMB_DIRECT_MSG_NEGOTIATE_REQ = 0,
166 SMB_DIRECT_MSG_DATA_TRANSFER
167};
168
169static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops;
170
171struct smb_direct_send_ctx {
172 struct list_head msg_list;
173 int wr_cnt;
174 bool need_invalidate_rkey;
175 unsigned int remote_key;
176};
177
178struct smb_direct_sendmsg {
179 struct smb_direct_transport *transport;
180 struct ib_send_wr wr;
181 struct list_head list;
182 int num_sge;
183 struct ib_sge sge[SMB_DIRECT_MAX_SEND_SGES];
184 struct ib_cqe cqe;
185 u8 packet[];
186};
187
188struct smb_direct_recvmsg {
189 struct smb_direct_transport *transport;
190 struct list_head list;
191 int type;
192 struct ib_sge sge;
193 struct ib_cqe cqe;
194 bool first_segment;
195 u8 packet[];
196};
197
198struct smb_direct_rdma_rw_msg {
199 struct smb_direct_transport *t;
200 struct ib_cqe cqe;
201 struct completion *completion;
202 struct rdma_rw_ctx rw_ctx;
203 struct sg_table sgt;
204 struct scatterlist sg_list[0];
205};
206
8ad8dc34
HL
207static inline int get_buf_page_count(void *buf, int size)
208{
209 return DIV_ROUND_UP((uintptr_t)buf + size, PAGE_SIZE) -
210 (uintptr_t)buf / PAGE_SIZE;
211}
0626e664
NJ
212
213static void smb_direct_destroy_pools(struct smb_direct_transport *transport);
214static void smb_direct_post_recv_credits(struct work_struct *work);
215static int smb_direct_post_send_data(struct smb_direct_transport *t,
070fb21e
NJ
216 struct smb_direct_send_ctx *send_ctx,
217 struct kvec *iov, int niov,
218 int remaining_data_length);
0626e664
NJ
219
220static inline void
221*smb_direct_recvmsg_payload(struct smb_direct_recvmsg *recvmsg)
222{
223 return (void *)recvmsg->packet;
224}
225
226static inline bool is_receive_credit_post_required(int receive_credits,
070fb21e 227 int avail_recvmsg_count)
0626e664
NJ
228{
229 return receive_credits <= (smb_direct_receive_credit_max >> 3) &&
230 avail_recvmsg_count >= (receive_credits >> 2);
231}
232
233static struct
234smb_direct_recvmsg *get_free_recvmsg(struct smb_direct_transport *t)
235{
236 struct smb_direct_recvmsg *recvmsg = NULL;
237
238 spin_lock(&t->recvmsg_queue_lock);
239 if (!list_empty(&t->recvmsg_queue)) {
240 recvmsg = list_first_entry(&t->recvmsg_queue,
241 struct smb_direct_recvmsg,
242 list);
243 list_del(&recvmsg->list);
244 }
245 spin_unlock(&t->recvmsg_queue_lock);
246 return recvmsg;
247}
248
249static void put_recvmsg(struct smb_direct_transport *t,
070fb21e 250 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
251{
252 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
070fb21e 253 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
254
255 spin_lock(&t->recvmsg_queue_lock);
256 list_add(&recvmsg->list, &t->recvmsg_queue);
257 spin_unlock(&t->recvmsg_queue_lock);
0626e664
NJ
258}
259
260static struct
261smb_direct_recvmsg *get_empty_recvmsg(struct smb_direct_transport *t)
262{
263 struct smb_direct_recvmsg *recvmsg = NULL;
264
265 spin_lock(&t->empty_recvmsg_queue_lock);
266 if (!list_empty(&t->empty_recvmsg_queue)) {
64b39f4a 267 recvmsg = list_first_entry(&t->empty_recvmsg_queue,
070fb21e 268 struct smb_direct_recvmsg, list);
0626e664
NJ
269 list_del(&recvmsg->list);
270 }
271 spin_unlock(&t->empty_recvmsg_queue_lock);
272 return recvmsg;
273}
274
275static void put_empty_recvmsg(struct smb_direct_transport *t,
070fb21e 276 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
277{
278 ib_dma_unmap_single(t->cm_id->device, recvmsg->sge.addr,
070fb21e 279 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
280
281 spin_lock(&t->empty_recvmsg_queue_lock);
282 list_add_tail(&recvmsg->list, &t->empty_recvmsg_queue);
283 spin_unlock(&t->empty_recvmsg_queue_lock);
284}
285
286static void enqueue_reassembly(struct smb_direct_transport *t,
070fb21e
NJ
287 struct smb_direct_recvmsg *recvmsg,
288 int data_length)
0626e664
NJ
289{
290 spin_lock(&t->reassembly_queue_lock);
291 list_add_tail(&recvmsg->list, &t->reassembly_queue);
292 t->reassembly_queue_length++;
293 /*
294 * Make sure reassembly_data_length is updated after list and
295 * reassembly_queue_length are updated. On the dequeue side
296 * reassembly_data_length is checked without a lock to determine
297 * if reassembly_queue_length and list is up to date
298 */
299 virt_wmb();
300 t->reassembly_data_length += data_length;
301 spin_unlock(&t->reassembly_queue_lock);
0626e664
NJ
302}
303
64b39f4a 304static struct smb_direct_recvmsg *get_first_reassembly(struct smb_direct_transport *t)
0626e664
NJ
305{
306 if (!list_empty(&t->reassembly_queue))
307 return list_first_entry(&t->reassembly_queue,
308 struct smb_direct_recvmsg, list);
309 else
310 return NULL;
311}
312
313static void smb_direct_disconnect_rdma_work(struct work_struct *work)
314{
315 struct smb_direct_transport *t =
316 container_of(work, struct smb_direct_transport,
317 disconnect_work);
318
319 if (t->status == SMB_DIRECT_CS_CONNECTED) {
320 t->status = SMB_DIRECT_CS_DISCONNECTING;
321 rdma_disconnect(t->cm_id);
322 }
323}
324
325static void
326smb_direct_disconnect_rdma_connection(struct smb_direct_transport *t)
327{
328 queue_work(smb_direct_wq, &t->disconnect_work);
329}
330
331static void smb_direct_send_immediate_work(struct work_struct *work)
332{
333 struct smb_direct_transport *t = container_of(work,
334 struct smb_direct_transport, send_immediate_work);
335
336 if (t->status != SMB_DIRECT_CS_CONNECTED)
337 return;
338
339 smb_direct_post_send_data(t, NULL, NULL, 0, 0);
340}
341
342static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
343{
344 struct smb_direct_transport *t;
345 struct ksmbd_conn *conn;
346
347 t = kzalloc(sizeof(*t), GFP_KERNEL);
348 if (!t)
349 return NULL;
350
351 t->cm_id = cm_id;
352 cm_id->context = t;
353
354 t->status = SMB_DIRECT_CS_NEW;
355 init_waitqueue_head(&t->wait_status);
356
357 spin_lock_init(&t->reassembly_queue_lock);
358 INIT_LIST_HEAD(&t->reassembly_queue);
359 t->reassembly_data_length = 0;
360 t->reassembly_queue_length = 0;
361 init_waitqueue_head(&t->wait_reassembly_queue);
362 init_waitqueue_head(&t->wait_send_credits);
363 init_waitqueue_head(&t->wait_rw_avail_ops);
364
365 spin_lock_init(&t->receive_credit_lock);
366 spin_lock_init(&t->recvmsg_queue_lock);
367 INIT_LIST_HEAD(&t->recvmsg_queue);
368
369 spin_lock_init(&t->empty_recvmsg_queue_lock);
370 INIT_LIST_HEAD(&t->empty_recvmsg_queue);
371
372 init_waitqueue_head(&t->wait_send_payload_pending);
373 atomic_set(&t->send_payload_pending, 0);
374 init_waitqueue_head(&t->wait_send_pending);
375 atomic_set(&t->send_pending, 0);
376
377 spin_lock_init(&t->lock_new_recv_credits);
378
379 INIT_DELAYED_WORK(&t->post_recv_credits_work,
380 smb_direct_post_recv_credits);
381 INIT_WORK(&t->send_immediate_work, smb_direct_send_immediate_work);
382 INIT_WORK(&t->disconnect_work, smb_direct_disconnect_rdma_work);
383
384 conn = ksmbd_conn_alloc();
385 if (!conn)
386 goto err;
387 conn->transport = KSMBD_TRANS(t);
388 KSMBD_TRANS(t)->conn = conn;
389 KSMBD_TRANS(t)->ops = &ksmbd_smb_direct_transport_ops;
390 return t;
391err:
392 kfree(t);
393 return NULL;
394}
395
396static void free_transport(struct smb_direct_transport *t)
397{
398 struct smb_direct_recvmsg *recvmsg;
399
400 wake_up_interruptible(&t->wait_send_credits);
401
402 ksmbd_debug(RDMA, "wait for all send posted to IB to finish\n");
403 wait_event(t->wait_send_payload_pending,
070fb21e 404 atomic_read(&t->send_payload_pending) == 0);
0626e664 405 wait_event(t->wait_send_pending,
070fb21e 406 atomic_read(&t->send_pending) == 0);
0626e664
NJ
407
408 cancel_work_sync(&t->disconnect_work);
409 cancel_delayed_work_sync(&t->post_recv_credits_work);
410 cancel_work_sync(&t->send_immediate_work);
411
412 if (t->qp) {
413 ib_drain_qp(t->qp);
414 ib_destroy_qp(t->qp);
415 }
416
417 ksmbd_debug(RDMA, "drain the reassembly queue\n");
418 do {
419 spin_lock(&t->reassembly_queue_lock);
420 recvmsg = get_first_reassembly(t);
421 if (recvmsg) {
422 list_del(&recvmsg->list);
64b39f4a 423 spin_unlock(&t->reassembly_queue_lock);
0626e664 424 put_recvmsg(t, recvmsg);
64b39f4a 425 } else {
0626e664 426 spin_unlock(&t->reassembly_queue_lock);
64b39f4a 427 }
0626e664
NJ
428 } while (recvmsg);
429 t->reassembly_data_length = 0;
430
431 if (t->send_cq)
432 ib_free_cq(t->send_cq);
433 if (t->recv_cq)
434 ib_free_cq(t->recv_cq);
435 if (t->pd)
436 ib_dealloc_pd(t->pd);
437 if (t->cm_id)
438 rdma_destroy_id(t->cm_id);
439
440 smb_direct_destroy_pools(t);
441 ksmbd_conn_free(KSMBD_TRANS(t)->conn);
442 kfree(t);
443}
444
445static struct smb_direct_sendmsg
446*smb_direct_alloc_sendmsg(struct smb_direct_transport *t)
447{
448 struct smb_direct_sendmsg *msg;
449
450 msg = mempool_alloc(t->sendmsg_mempool, GFP_KERNEL);
451 if (!msg)
452 return ERR_PTR(-ENOMEM);
453 msg->transport = t;
454 INIT_LIST_HEAD(&msg->list);
455 msg->num_sge = 0;
456 return msg;
457}
458
459static void smb_direct_free_sendmsg(struct smb_direct_transport *t,
070fb21e 460 struct smb_direct_sendmsg *msg)
0626e664
NJ
461{
462 int i;
463
464 if (msg->num_sge > 0) {
465 ib_dma_unmap_single(t->cm_id->device,
070fb21e
NJ
466 msg->sge[0].addr, msg->sge[0].length,
467 DMA_TO_DEVICE);
0626e664
NJ
468 for (i = 1; i < msg->num_sge; i++)
469 ib_dma_unmap_page(t->cm_id->device,
070fb21e
NJ
470 msg->sge[i].addr, msg->sge[i].length,
471 DMA_TO_DEVICE);
0626e664
NJ
472 }
473 mempool_free(msg, t->sendmsg_mempool);
474}
475
476static int smb_direct_check_recvmsg(struct smb_direct_recvmsg *recvmsg)
477{
478 switch (recvmsg->type) {
479 case SMB_DIRECT_MSG_DATA_TRANSFER: {
480 struct smb_direct_data_transfer *req =
64b39f4a
NJ
481 (struct smb_direct_data_transfer *)recvmsg->packet;
482 struct smb2_hdr *hdr = (struct smb2_hdr *)(recvmsg->packet
0626e664
NJ
483 + le32_to_cpu(req->data_offset) - 4);
484 ksmbd_debug(RDMA,
070fb21e
NJ
485 "CreditGranted: %u, CreditRequested: %u, DataLength: %u, RemainingDataLength: %u, SMB: %x, Command: %u\n",
486 le16_to_cpu(req->credits_granted),
487 le16_to_cpu(req->credits_requested),
488 req->data_length, req->remaining_data_length,
489 hdr->ProtocolId, hdr->Command);
0626e664
NJ
490 break;
491 }
492 case SMB_DIRECT_MSG_NEGOTIATE_REQ: {
493 struct smb_direct_negotiate_req *req =
494 (struct smb_direct_negotiate_req *)recvmsg->packet;
495 ksmbd_debug(RDMA,
070fb21e
NJ
496 "MinVersion: %u, MaxVersion: %u, CreditRequested: %u, MaxSendSize: %u, MaxRecvSize: %u, MaxFragmentedSize: %u\n",
497 le16_to_cpu(req->min_version),
498 le16_to_cpu(req->max_version),
499 le16_to_cpu(req->credits_requested),
500 le32_to_cpu(req->preferred_send_size),
501 le32_to_cpu(req->max_receive_size),
502 le32_to_cpu(req->max_fragmented_size));
0626e664 503 if (le16_to_cpu(req->min_version) > 0x0100 ||
64b39f4a 504 le16_to_cpu(req->max_version) < 0x0100)
0626e664
NJ
505 return -EOPNOTSUPP;
506 if (le16_to_cpu(req->credits_requested) <= 0 ||
64b39f4a
NJ
507 le32_to_cpu(req->max_receive_size) <= 128 ||
508 le32_to_cpu(req->max_fragmented_size) <=
509 128 * 1024)
0626e664
NJ
510 return -ECONNABORTED;
511
512 break;
513 }
514 default:
515 return -EINVAL;
516 }
517 return 0;
518}
519
520static void recv_done(struct ib_cq *cq, struct ib_wc *wc)
521{
522 struct smb_direct_recvmsg *recvmsg;
523 struct smb_direct_transport *t;
524
525 recvmsg = container_of(wc->wr_cqe, struct smb_direct_recvmsg, cqe);
526 t = recvmsg->transport;
527
528 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
529 if (wc->status != IB_WC_WR_FLUSH_ERR) {
bde1694a
NJ
530 pr_err("Recv error. status='%s (%d)' opcode=%d\n",
531 ib_wc_status_msg(wc->status), wc->status,
532 wc->opcode);
0626e664
NJ
533 smb_direct_disconnect_rdma_connection(t);
534 }
535 put_empty_recvmsg(t, recvmsg);
536 return;
537 }
538
539 ksmbd_debug(RDMA, "Recv completed. status='%s (%d)', opcode=%d\n",
070fb21e
NJ
540 ib_wc_status_msg(wc->status), wc->status,
541 wc->opcode);
0626e664
NJ
542
543 ib_dma_sync_single_for_cpu(wc->qp->device, recvmsg->sge.addr,
070fb21e 544 recvmsg->sge.length, DMA_FROM_DEVICE);
0626e664
NJ
545
546 switch (recvmsg->type) {
547 case SMB_DIRECT_MSG_NEGOTIATE_REQ:
548 t->negotiation_requested = true;
549 t->full_packet_received = true;
550 wake_up_interruptible(&t->wait_status);
551 break;
552 case SMB_DIRECT_MSG_DATA_TRANSFER: {
553 struct smb_direct_data_transfer *data_transfer =
554 (struct smb_direct_data_transfer *)recvmsg->packet;
555 int data_length = le32_to_cpu(data_transfer->data_length);
556 int avail_recvmsg_count, receive_credits;
557
558 if (data_length) {
559 if (t->full_packet_received)
560 recvmsg->first_segment = true;
561
562 if (le32_to_cpu(data_transfer->remaining_data_length))
563 t->full_packet_received = false;
564 else
565 t->full_packet_received = true;
566
567 enqueue_reassembly(t, recvmsg, data_length);
568 wake_up_interruptible(&t->wait_reassembly_queue);
569
570 spin_lock(&t->receive_credit_lock);
571 receive_credits = --(t->recv_credits);
572 avail_recvmsg_count = t->count_avail_recvmsg;
573 spin_unlock(&t->receive_credit_lock);
574 } else {
575 put_empty_recvmsg(t, recvmsg);
576
577 spin_lock(&t->receive_credit_lock);
578 receive_credits = --(t->recv_credits);
579 avail_recvmsg_count = ++(t->count_avail_recvmsg);
580 spin_unlock(&t->receive_credit_lock);
581 }
582
583 t->recv_credit_target =
584 le16_to_cpu(data_transfer->credits_requested);
585 atomic_add(le16_to_cpu(data_transfer->credits_granted),
070fb21e 586 &t->send_credits);
0626e664
NJ
587
588 if (le16_to_cpu(data_transfer->flags) &
070fb21e 589 SMB_DIRECT_RESPONSE_REQUESTED)
0626e664
NJ
590 queue_work(smb_direct_wq, &t->send_immediate_work);
591
592 if (atomic_read(&t->send_credits) > 0)
593 wake_up_interruptible(&t->wait_send_credits);
594
64b39f4a 595 if (is_receive_credit_post_required(receive_credits, avail_recvmsg_count))
0626e664 596 mod_delayed_work(smb_direct_wq,
070fb21e 597 &t->post_recv_credits_work, 0);
0626e664
NJ
598 break;
599 }
600 default:
601 break;
602 }
603}
604
605static int smb_direct_post_recv(struct smb_direct_transport *t,
070fb21e 606 struct smb_direct_recvmsg *recvmsg)
0626e664
NJ
607{
608 struct ib_recv_wr wr;
609 int ret;
610
611 recvmsg->sge.addr = ib_dma_map_single(t->cm_id->device,
070fb21e
NJ
612 recvmsg->packet, t->max_recv_size,
613 DMA_FROM_DEVICE);
0626e664
NJ
614 ret = ib_dma_mapping_error(t->cm_id->device, recvmsg->sge.addr);
615 if (ret)
616 return ret;
617 recvmsg->sge.length = t->max_recv_size;
618 recvmsg->sge.lkey = t->pd->local_dma_lkey;
619 recvmsg->cqe.done = recv_done;
620
621 wr.wr_cqe = &recvmsg->cqe;
622 wr.next = NULL;
623 wr.sg_list = &recvmsg->sge;
624 wr.num_sge = 1;
625
626 ret = ib_post_recv(t->qp, &wr, NULL);
627 if (ret) {
bde1694a 628 pr_err("Can't post recv: %d\n", ret);
0626e664 629 ib_dma_unmap_single(t->cm_id->device,
070fb21e
NJ
630 recvmsg->sge.addr, recvmsg->sge.length,
631 DMA_FROM_DEVICE);
0626e664
NJ
632 smb_direct_disconnect_rdma_connection(t);
633 return ret;
634 }
635 return ret;
636}
637
638static int smb_direct_read(struct ksmbd_transport *t, char *buf,
070fb21e 639 unsigned int size)
0626e664
NJ
640{
641 struct smb_direct_recvmsg *recvmsg;
642 struct smb_direct_data_transfer *data_transfer;
643 int to_copy, to_read, data_read, offset;
644 u32 data_length, remaining_data_length, data_offset;
645 int rc;
646 struct smb_direct_transport *st = SMB_DIRECT_TRANS(t);
647
648again:
649 if (st->status != SMB_DIRECT_CS_CONNECTED) {
bde1694a 650 pr_err("disconnected\n");
0626e664
NJ
651 return -ENOTCONN;
652 }
653
654 /*
655 * No need to hold the reassembly queue lock all the time as we are
656 * the only one reading from the front of the queue. The transport
657 * may add more entries to the back of the queue at the same time
658 */
659 if (st->reassembly_data_length >= size) {
660 int queue_length;
661 int queue_removed = 0;
662
663 /*
664 * Need to make sure reassembly_data_length is read before
665 * reading reassembly_queue_length and calling
666 * get_first_reassembly. This call is lock free
667 * as we never read at the end of the queue which are being
668 * updated in SOFTIRQ as more data is received
669 */
670 virt_rmb();
671 queue_length = st->reassembly_queue_length;
672 data_read = 0;
673 to_read = size;
674 offset = st->first_entry_offset;
675 while (data_read < size) {
676 recvmsg = get_first_reassembly(st);
677 data_transfer = smb_direct_recvmsg_payload(recvmsg);
678 data_length = le32_to_cpu(data_transfer->data_length);
679 remaining_data_length =
64b39f4a 680 le32_to_cpu(data_transfer->remaining_data_length);
0626e664
NJ
681 data_offset = le32_to_cpu(data_transfer->data_offset);
682
683 /*
684 * The upper layer expects RFC1002 length at the
685 * beginning of the payload. Return it to indicate
686 * the total length of the packet. This minimize the
687 * change to upper layer packet processing logic. This
688 * will be eventually remove when an intermediate
689 * transport layer is added
690 */
691 if (recvmsg->first_segment && size == 4) {
692 unsigned int rfc1002_len =
693 data_length + remaining_data_length;
694 *((__be32 *)buf) = cpu_to_be32(rfc1002_len);
695 data_read = 4;
696 recvmsg->first_segment = false;
697 ksmbd_debug(RDMA,
070fb21e
NJ
698 "returning rfc1002 length %d\n",
699 rfc1002_len);
0626e664
NJ
700 goto read_rfc1002_done;
701 }
702
703 to_copy = min_t(int, data_length - offset, to_read);
64b39f4a 704 memcpy(buf + data_read, (char *)data_transfer + data_offset + offset,
070fb21e 705 to_copy);
0626e664
NJ
706
707 /* move on to the next buffer? */
708 if (to_copy == data_length - offset) {
709 queue_length--;
710 /*
711 * No need to lock if we are not at the
712 * end of the queue
713 */
64b39f4a 714 if (queue_length) {
0626e664 715 list_del(&recvmsg->list);
64b39f4a
NJ
716 } else {
717 spin_lock_irq(&st->reassembly_queue_lock);
0626e664 718 list_del(&recvmsg->list);
64b39f4a 719 spin_unlock_irq(&st->reassembly_queue_lock);
0626e664
NJ
720 }
721 queue_removed++;
722 put_recvmsg(st, recvmsg);
723 offset = 0;
64b39f4a 724 } else {
0626e664 725 offset += to_copy;
64b39f4a 726 }
0626e664
NJ
727
728 to_read -= to_copy;
729 data_read += to_copy;
730 }
731
732 spin_lock_irq(&st->reassembly_queue_lock);
733 st->reassembly_data_length -= data_read;
734 st->reassembly_queue_length -= queue_removed;
735 spin_unlock_irq(&st->reassembly_queue_lock);
736
737 spin_lock(&st->receive_credit_lock);
738 st->count_avail_recvmsg += queue_removed;
64b39f4a 739 if (is_receive_credit_post_required(st->recv_credits, st->count_avail_recvmsg)) {
0626e664
NJ
740 spin_unlock(&st->receive_credit_lock);
741 mod_delayed_work(smb_direct_wq,
070fb21e 742 &st->post_recv_credits_work, 0);
64b39f4a 743 } else {
0626e664 744 spin_unlock(&st->receive_credit_lock);
64b39f4a 745 }
0626e664
NJ
746
747 st->first_entry_offset = offset;
748 ksmbd_debug(RDMA,
070fb21e
NJ
749 "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
750 data_read, st->reassembly_data_length,
751 st->first_entry_offset);
0626e664
NJ
752read_rfc1002_done:
753 return data_read;
754 }
755
756 ksmbd_debug(RDMA, "wait_event on more data\n");
64b39f4a 757 rc = wait_event_interruptible(st->wait_reassembly_queue,
070fb21e
NJ
758 st->reassembly_data_length >= size ||
759 st->status != SMB_DIRECT_CS_CONNECTED);
0626e664
NJ
760 if (rc)
761 return -EINTR;
762
763 goto again;
764}
765
766static void smb_direct_post_recv_credits(struct work_struct *work)
767{
768 struct smb_direct_transport *t = container_of(work,
769 struct smb_direct_transport, post_recv_credits_work.work);
770 struct smb_direct_recvmsg *recvmsg;
771 int receive_credits, credits = 0;
772 int ret;
773 int use_free = 1;
774
775 spin_lock(&t->receive_credit_lock);
776 receive_credits = t->recv_credits;
777 spin_unlock(&t->receive_credit_lock);
778
779 if (receive_credits < t->recv_credit_target) {
780 while (true) {
781 if (use_free)
782 recvmsg = get_free_recvmsg(t);
783 else
784 recvmsg = get_empty_recvmsg(t);
785 if (!recvmsg) {
786 if (use_free) {
787 use_free = 0;
788 continue;
64b39f4a 789 } else {
0626e664 790 break;
64b39f4a 791 }
0626e664
NJ
792 }
793
794 recvmsg->type = SMB_DIRECT_MSG_DATA_TRANSFER;
795 recvmsg->first_segment = false;
796
797 ret = smb_direct_post_recv(t, recvmsg);
798 if (ret) {
bde1694a 799 pr_err("Can't post recv: %d\n", ret);
0626e664
NJ
800 put_recvmsg(t, recvmsg);
801 break;
802 }
803 credits++;
804 }
805 }
806
807 spin_lock(&t->receive_credit_lock);
808 t->recv_credits += credits;
809 t->count_avail_recvmsg -= credits;
810 spin_unlock(&t->receive_credit_lock);
811
812 spin_lock(&t->lock_new_recv_credits);
813 t->new_recv_credits += credits;
814 spin_unlock(&t->lock_new_recv_credits);
815
816 if (credits)
817 queue_work(smb_direct_wq, &t->send_immediate_work);
818}
819
820static void send_done(struct ib_cq *cq, struct ib_wc *wc)
821{
822 struct smb_direct_sendmsg *sendmsg, *sibling;
823 struct smb_direct_transport *t;
824 struct list_head *pos, *prev, *end;
825
826 sendmsg = container_of(wc->wr_cqe, struct smb_direct_sendmsg, cqe);
827 t = sendmsg->transport;
828
829 ksmbd_debug(RDMA, "Send completed. status='%s (%d)', opcode=%d\n",
070fb21e
NJ
830 ib_wc_status_msg(wc->status), wc->status,
831 wc->opcode);
0626e664
NJ
832
833 if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
bde1694a
NJ
834 pr_err("Send error. status='%s (%d)', opcode=%d\n",
835 ib_wc_status_msg(wc->status), wc->status,
836 wc->opcode);
0626e664
NJ
837 smb_direct_disconnect_rdma_connection(t);
838 }
839
840 if (sendmsg->num_sge > 1) {
841 if (atomic_dec_and_test(&t->send_payload_pending))
842 wake_up(&t->wait_send_payload_pending);
843 } else {
844 if (atomic_dec_and_test(&t->send_pending))
845 wake_up(&t->wait_send_pending);
846 }
847
848 /* iterate and free the list of messages in reverse. the list's head
849 * is invalid.
850 */
851 for (pos = &sendmsg->list, prev = pos->prev, end = sendmsg->list.next;
070fb21e 852 prev != end; pos = prev, prev = prev->prev) {
0626e664
NJ
853 sibling = container_of(pos, struct smb_direct_sendmsg, list);
854 smb_direct_free_sendmsg(t, sibling);
855 }
856
857 sibling = container_of(pos, struct smb_direct_sendmsg, list);
858 smb_direct_free_sendmsg(t, sibling);
859}
860
861static int manage_credits_prior_sending(struct smb_direct_transport *t)
862{
863 int new_credits;
864
865 spin_lock(&t->lock_new_recv_credits);
866 new_credits = t->new_recv_credits;
867 t->new_recv_credits = 0;
868 spin_unlock(&t->lock_new_recv_credits);
869
870 return new_credits;
871}
872
873static int smb_direct_post_send(struct smb_direct_transport *t,
070fb21e 874 struct ib_send_wr *wr)
0626e664
NJ
875{
876 int ret;
877
878 if (wr->num_sge > 1)
879 atomic_inc(&t->send_payload_pending);
880 else
881 atomic_inc(&t->send_pending);
882
883 ret = ib_post_send(t->qp, wr, NULL);
884 if (ret) {
bde1694a 885 pr_err("failed to post send: %d\n", ret);
0626e664
NJ
886 if (wr->num_sge > 1) {
887 if (atomic_dec_and_test(&t->send_payload_pending))
888 wake_up(&t->wait_send_payload_pending);
889 } else {
890 if (atomic_dec_and_test(&t->send_pending))
891 wake_up(&t->wait_send_pending);
892 }
893 smb_direct_disconnect_rdma_connection(t);
894 }
895 return ret;
896}
897
898static void smb_direct_send_ctx_init(struct smb_direct_transport *t,
070fb21e
NJ
899 struct smb_direct_send_ctx *send_ctx,
900 bool need_invalidate_rkey,
901 unsigned int remote_key)
0626e664
NJ
902{
903 INIT_LIST_HEAD(&send_ctx->msg_list);
904 send_ctx->wr_cnt = 0;
905 send_ctx->need_invalidate_rkey = need_invalidate_rkey;
906 send_ctx->remote_key = remote_key;
907}
908
909static int smb_direct_flush_send_list(struct smb_direct_transport *t,
070fb21e
NJ
910 struct smb_direct_send_ctx *send_ctx,
911 bool is_last)
0626e664
NJ
912{
913 struct smb_direct_sendmsg *first, *last;
914 int ret;
915
916 if (list_empty(&send_ctx->msg_list))
917 return 0;
918
919 first = list_first_entry(&send_ctx->msg_list,
070fb21e
NJ
920 struct smb_direct_sendmsg,
921 list);
0626e664 922 last = list_last_entry(&send_ctx->msg_list,
070fb21e
NJ
923 struct smb_direct_sendmsg,
924 list);
0626e664
NJ
925
926 last->wr.send_flags = IB_SEND_SIGNALED;
927 last->wr.wr_cqe = &last->cqe;
928 if (is_last && send_ctx->need_invalidate_rkey) {
929 last->wr.opcode = IB_WR_SEND_WITH_INV;
930 last->wr.ex.invalidate_rkey = send_ctx->remote_key;
931 }
932
933 ret = smb_direct_post_send(t, &first->wr);
934 if (!ret) {
935 smb_direct_send_ctx_init(t, send_ctx,
070fb21e
NJ
936 send_ctx->need_invalidate_rkey,
937 send_ctx->remote_key);
0626e664
NJ
938 } else {
939 atomic_add(send_ctx->wr_cnt, &t->send_credits);
940 wake_up(&t->wait_send_credits);
941 list_for_each_entry_safe(first, last, &send_ctx->msg_list,
070fb21e 942 list) {
0626e664
NJ
943 smb_direct_free_sendmsg(t, first);
944 }
945 }
946 return ret;
947}
948
949static int wait_for_credits(struct smb_direct_transport *t,
070fb21e 950 wait_queue_head_t *waitq, atomic_t *credits)
0626e664
NJ
951{
952 int ret;
953
954 do {
955 if (atomic_dec_return(credits) >= 0)
956 return 0;
957
958 atomic_inc(credits);
959 ret = wait_event_interruptible(*waitq,
070fb21e
NJ
960 atomic_read(credits) > 0 ||
961 t->status != SMB_DIRECT_CS_CONNECTED);
0626e664
NJ
962
963 if (t->status != SMB_DIRECT_CS_CONNECTED)
964 return -ENOTCONN;
965 else if (ret < 0)
966 return ret;
967 } while (true);
968}
969
970static int wait_for_send_credits(struct smb_direct_transport *t,
070fb21e 971 struct smb_direct_send_ctx *send_ctx)
0626e664
NJ
972{
973 int ret;
974
070fb21e
NJ
975 if (send_ctx &&
976 (send_ctx->wr_cnt >= 16 || atomic_read(&t->send_credits) <= 1)) {
0626e664
NJ
977 ret = smb_direct_flush_send_list(t, send_ctx, false);
978 if (ret)
979 return ret;
980 }
981
982 return wait_for_credits(t, &t->wait_send_credits, &t->send_credits);
983}
984
985static int smb_direct_create_header(struct smb_direct_transport *t,
070fb21e
NJ
986 int size, int remaining_data_length,
987 struct smb_direct_sendmsg **sendmsg_out)
0626e664
NJ
988{
989 struct smb_direct_sendmsg *sendmsg;
990 struct smb_direct_data_transfer *packet;
991 int header_length;
992 int ret;
993
994 sendmsg = smb_direct_alloc_sendmsg(t);
8ef32967
DC
995 if (IS_ERR(sendmsg))
996 return PTR_ERR(sendmsg);
0626e664
NJ
997
998 /* Fill in the packet header */
999 packet = (struct smb_direct_data_transfer *)sendmsg->packet;
1000 packet->credits_requested = cpu_to_le16(t->send_credit_target);
1001 packet->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
1002
1003 packet->flags = 0;
1004 packet->reserved = 0;
1005 if (!size)
1006 packet->data_offset = 0;
1007 else
1008 packet->data_offset = cpu_to_le32(24);
1009 packet->data_length = cpu_to_le32(size);
1010 packet->remaining_data_length = cpu_to_le32(remaining_data_length);
1011 packet->padding = 0;
1012
1013 ksmbd_debug(RDMA,
070fb21e
NJ
1014 "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
1015 le16_to_cpu(packet->credits_requested),
1016 le16_to_cpu(packet->credits_granted),
1017 le32_to_cpu(packet->data_offset),
1018 le32_to_cpu(packet->data_length),
1019 le32_to_cpu(packet->remaining_data_length));
0626e664
NJ
1020
1021 /* Map the packet to DMA */
1022 header_length = sizeof(struct smb_direct_data_transfer);
1023 /* If this is a packet without payload, don't send padding */
1024 if (!size)
1025 header_length =
1026 offsetof(struct smb_direct_data_transfer, padding);
1027
1028 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
1029 (void *)packet,
1030 header_length,
1031 DMA_TO_DEVICE);
1032 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
1033 if (ret) {
1034 smb_direct_free_sendmsg(t, sendmsg);
1035 return ret;
1036 }
1037
1038 sendmsg->num_sge = 1;
1039 sendmsg->sge[0].length = header_length;
1040 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1041
1042 *sendmsg_out = sendmsg;
1043 return 0;
1044}
1045
64b39f4a 1046static int get_sg_list(void *buf, int size, struct scatterlist *sg_list, int nentries)
0626e664
NJ
1047{
1048 bool high = is_vmalloc_addr(buf);
1049 struct page *page;
1050 int offset, len;
1051 int i = 0;
1052
8ad8dc34 1053 if (nentries < get_buf_page_count(buf, size))
0626e664
NJ
1054 return -EINVAL;
1055
1056 offset = offset_in_page(buf);
1057 buf -= offset;
1058 while (size > 0) {
1059 len = min_t(int, PAGE_SIZE - offset, size);
1060 if (high)
1061 page = vmalloc_to_page(buf);
1062 else
1063 page = kmap_to_page(buf);
1064
1065 if (!sg_list)
1066 return -EINVAL;
1067 sg_set_page(sg_list, page, len, offset);
1068 sg_list = sg_next(sg_list);
1069
1070 buf += PAGE_SIZE;
1071 size -= len;
1072 offset = 0;
1073 i++;
1074 }
1075 return i;
1076}
1077
1078static int get_mapped_sg_list(struct ib_device *device, void *buf, int size,
070fb21e
NJ
1079 struct scatterlist *sg_list, int nentries,
1080 enum dma_data_direction dir)
0626e664
NJ
1081{
1082 int npages;
1083
1084 npages = get_sg_list(buf, size, sg_list, nentries);
1085 if (npages <= 0)
1086 return -EINVAL;
1087 return ib_dma_map_sg(device, sg_list, npages, dir);
1088}
1089
1090static int post_sendmsg(struct smb_direct_transport *t,
070fb21e
NJ
1091 struct smb_direct_send_ctx *send_ctx,
1092 struct smb_direct_sendmsg *msg)
0626e664
NJ
1093{
1094 int i;
1095
1096 for (i = 0; i < msg->num_sge; i++)
1097 ib_dma_sync_single_for_device(t->cm_id->device,
070fb21e
NJ
1098 msg->sge[i].addr, msg->sge[i].length,
1099 DMA_TO_DEVICE);
0626e664
NJ
1100
1101 msg->cqe.done = send_done;
1102 msg->wr.opcode = IB_WR_SEND;
1103 msg->wr.sg_list = &msg->sge[0];
1104 msg->wr.num_sge = msg->num_sge;
1105 msg->wr.next = NULL;
1106
1107 if (send_ctx) {
1108 msg->wr.wr_cqe = NULL;
1109 msg->wr.send_flags = 0;
1110 if (!list_empty(&send_ctx->msg_list)) {
1111 struct smb_direct_sendmsg *last;
1112
1113 last = list_last_entry(&send_ctx->msg_list,
1114 struct smb_direct_sendmsg,
1115 list);
1116 last->wr.next = &msg->wr;
1117 }
1118 list_add_tail(&msg->list, &send_ctx->msg_list);
1119 send_ctx->wr_cnt++;
1120 return 0;
1121 }
1122
1123 msg->wr.wr_cqe = &msg->cqe;
1124 msg->wr.send_flags = IB_SEND_SIGNALED;
1125 return smb_direct_post_send(t, &msg->wr);
1126}
1127
1128static int smb_direct_post_send_data(struct smb_direct_transport *t,
070fb21e
NJ
1129 struct smb_direct_send_ctx *send_ctx,
1130 struct kvec *iov, int niov,
1131 int remaining_data_length)
0626e664
NJ
1132{
1133 int i, j, ret;
1134 struct smb_direct_sendmsg *msg;
1135 int data_length;
64b39f4a 1136 struct scatterlist sg[SMB_DIRECT_MAX_SEND_SGES - 1];
0626e664
NJ
1137
1138 ret = wait_for_send_credits(t, send_ctx);
1139 if (ret)
1140 return ret;
1141
1142 data_length = 0;
1143 for (i = 0; i < niov; i++)
1144 data_length += iov[i].iov_len;
1145
1146 ret = smb_direct_create_header(t, data_length, remaining_data_length,
1147 &msg);
1148 if (ret) {
1149 atomic_inc(&t->send_credits);
1150 return ret;
1151 }
1152
1153 for (i = 0; i < niov; i++) {
1154 struct ib_sge *sge;
1155 int sg_cnt;
1156
64b39f4a 1157 sg_init_table(sg, SMB_DIRECT_MAX_SEND_SGES - 1);
0626e664 1158 sg_cnt = get_mapped_sg_list(t->cm_id->device,
070fb21e
NJ
1159 iov[i].iov_base, iov[i].iov_len,
1160 sg, SMB_DIRECT_MAX_SEND_SGES - 1,
1161 DMA_TO_DEVICE);
0626e664 1162 if (sg_cnt <= 0) {
bde1694a 1163 pr_err("failed to map buffer\n");
bc3fcc94 1164 ret = -ENOMEM;
0626e664 1165 goto err;
64b39f4a 1166 } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES - 1) {
bde1694a 1167 pr_err("buffer not fitted into sges\n");
0626e664
NJ
1168 ret = -E2BIG;
1169 ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
1170 DMA_TO_DEVICE);
1171 goto err;
1172 }
1173
1174 for (j = 0; j < sg_cnt; j++) {
1175 sge = &msg->sge[msg->num_sge];
1176 sge->addr = sg_dma_address(&sg[j]);
1177 sge->length = sg_dma_len(&sg[j]);
1178 sge->lkey = t->pd->local_dma_lkey;
1179 msg->num_sge++;
1180 }
1181 }
1182
1183 ret = post_sendmsg(t, send_ctx, msg);
1184 if (ret)
1185 goto err;
1186 return 0;
1187err:
1188 smb_direct_free_sendmsg(t, msg);
1189 atomic_inc(&t->send_credits);
1190 return ret;
1191}
1192
1193static int smb_direct_writev(struct ksmbd_transport *t,
070fb21e
NJ
1194 struct kvec *iov, int niovs, int buflen,
1195 bool need_invalidate, unsigned int remote_key)
0626e664
NJ
1196{
1197 struct smb_direct_transport *st = SMB_DIRECT_TRANS(t);
1198 int remaining_data_length;
1199 int start, i, j;
1200 int max_iov_size = st->max_send_size -
1201 sizeof(struct smb_direct_data_transfer);
1202 int ret;
1203 struct kvec vec;
1204 struct smb_direct_send_ctx send_ctx;
1205
1206 if (st->status != SMB_DIRECT_CS_CONNECTED) {
1207 ret = -ENOTCONN;
1208 goto done;
1209 }
1210
1211 //FIXME: skip RFC1002 header..
1212 buflen -= 4;
1213 iov[0].iov_base += 4;
1214 iov[0].iov_len -= 4;
1215
1216 remaining_data_length = buflen;
1217 ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
1218
1219 smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1220 start = i = 0;
1221 buflen = 0;
1222 while (true) {
1223 buflen += iov[i].iov_len;
1224 if (buflen > max_iov_size) {
1225 if (i > start) {
1226 remaining_data_length -=
64b39f4a 1227 (buflen - iov[i].iov_len);
0626e664 1228 ret = smb_direct_post_send_data(st, &send_ctx,
070fb21e
NJ
1229 &iov[start], i - start,
1230 remaining_data_length);
0626e664
NJ
1231 if (ret)
1232 goto done;
1233 } else {
1234 /* iov[start] is too big, break it */
64b39f4a 1235 int nvec = (buflen + max_iov_size - 1) /
0626e664
NJ
1236 max_iov_size;
1237
1238 for (j = 0; j < nvec; j++) {
1239 vec.iov_base =
1240 (char *)iov[start].iov_base +
64b39f4a 1241 j * max_iov_size;
0626e664
NJ
1242 vec.iov_len =
1243 min_t(int, max_iov_size,
070fb21e 1244 buflen - max_iov_size * j);
0626e664 1245 remaining_data_length -= vec.iov_len;
070fb21e
NJ
1246 ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1247 remaining_data_length);
0626e664
NJ
1248 if (ret)
1249 goto done;
1250 }
1251 i++;
1252 if (i == niovs)
1253 break;
1254 }
1255 start = i;
1256 buflen = 0;
1257 } else {
1258 i++;
1259 if (i == niovs) {
1260 /* send out all remaining vecs */
1261 remaining_data_length -= buflen;
1262 ret = smb_direct_post_send_data(st, &send_ctx,
070fb21e
NJ
1263 &iov[start], i - start,
1264 remaining_data_length);
0626e664
NJ
1265 if (ret)
1266 goto done;
1267 break;
1268 }
1269 }
1270 }
1271
1272done:
1273 ret = smb_direct_flush_send_list(st, &send_ctx, true);
1274
1275 /*
1276 * As an optimization, we don't wait for individual I/O to finish
1277 * before sending the next one.
1278 * Send them all and wait for pending send count to get to 0
1279 * that means all the I/Os have been out and we are good to return
1280 */
1281
1282 wait_event(st->wait_send_payload_pending,
070fb21e 1283 atomic_read(&st->send_payload_pending) == 0);
0626e664
NJ
1284 return ret;
1285}
1286
1287static void read_write_done(struct ib_cq *cq, struct ib_wc *wc,
070fb21e 1288 enum dma_data_direction dir)
0626e664
NJ
1289{
1290 struct smb_direct_rdma_rw_msg *msg = container_of(wc->wr_cqe,
070fb21e 1291 struct smb_direct_rdma_rw_msg, cqe);
0626e664
NJ
1292 struct smb_direct_transport *t = msg->t;
1293
1294 if (wc->status != IB_WC_SUCCESS) {
bde1694a
NJ
1295 pr_err("read/write error. opcode = %d, status = %s(%d)\n",
1296 wc->opcode, ib_wc_status_msg(wc->status), wc->status);
0626e664
NJ
1297 smb_direct_disconnect_rdma_connection(t);
1298 }
1299
1300 if (atomic_inc_return(&t->rw_avail_ops) > 0)
1301 wake_up(&t->wait_rw_avail_ops);
1302
1303 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
070fb21e 1304 msg->sg_list, msg->sgt.nents, dir);
0626e664
NJ
1305 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1306 complete(msg->completion);
1307 kfree(msg);
1308}
1309
1310static void read_done(struct ib_cq *cq, struct ib_wc *wc)
1311{
1312 read_write_done(cq, wc, DMA_FROM_DEVICE);
1313}
1314
1315static void write_done(struct ib_cq *cq, struct ib_wc *wc)
1316{
1317 read_write_done(cq, wc, DMA_TO_DEVICE);
1318}
1319
1320static int smb_direct_rdma_xmit(struct smb_direct_transport *t, void *buf,
070fb21e
NJ
1321 int buf_len, u32 remote_key, u64 remote_offset,
1322 u32 remote_len, bool is_read)
0626e664
NJ
1323{
1324 struct smb_direct_rdma_rw_msg *msg;
1325 int ret;
1326 DECLARE_COMPLETION_ONSTACK(completion);
1327 struct ib_send_wr *first_wr = NULL;
1328
1329 ret = wait_for_credits(t, &t->wait_rw_avail_ops, &t->rw_avail_ops);
1330 if (ret < 0)
1331 return ret;
1332
1333 /* TODO: mempool */
1334 msg = kmalloc(offsetof(struct smb_direct_rdma_rw_msg, sg_list) +
070fb21e 1335 sizeof(struct scatterlist) * SG_CHUNK_SIZE, GFP_KERNEL);
0626e664
NJ
1336 if (!msg) {
1337 atomic_inc(&t->rw_avail_ops);
1338 return -ENOMEM;
1339 }
1340
1341 msg->sgt.sgl = &msg->sg_list[0];
1342 ret = sg_alloc_table_chained(&msg->sgt,
8ad8dc34 1343 get_buf_page_count(buf, buf_len),
070fb21e 1344 msg->sg_list, SG_CHUNK_SIZE);
0626e664
NJ
1345 if (ret) {
1346 atomic_inc(&t->rw_avail_ops);
1347 kfree(msg);
1348 return -ENOMEM;
1349 }
1350
1351 ret = get_sg_list(buf, buf_len, msg->sgt.sgl, msg->sgt.orig_nents);
1352 if (ret <= 0) {
bde1694a 1353 pr_err("failed to get pages\n");
0626e664
NJ
1354 goto err;
1355 }
1356
1357 ret = rdma_rw_ctx_init(&msg->rw_ctx, t->qp, t->qp->port,
8ad8dc34 1358 msg->sg_list, get_buf_page_count(buf, buf_len),
070fb21e
NJ
1359 0, remote_offset, remote_key,
1360 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
0626e664 1361 if (ret < 0) {
bde1694a 1362 pr_err("failed to init rdma_rw_ctx: %d\n", ret);
0626e664
NJ
1363 goto err;
1364 }
1365
1366 msg->t = t;
1367 msg->cqe.done = is_read ? read_done : write_done;
1368 msg->completion = &completion;
1369 first_wr = rdma_rw_ctx_wrs(&msg->rw_ctx, t->qp, t->qp->port,
070fb21e 1370 &msg->cqe, NULL);
0626e664
NJ
1371
1372 ret = ib_post_send(t->qp, first_wr, NULL);
1373 if (ret) {
bde1694a 1374 pr_err("failed to post send wr: %d\n", ret);
0626e664
NJ
1375 goto err;
1376 }
1377
1378 wait_for_completion(&completion);
1379 return 0;
1380
1381err:
1382 atomic_inc(&t->rw_avail_ops);
1383 if (first_wr)
1384 rdma_rw_ctx_destroy(&msg->rw_ctx, t->qp, t->qp->port,
070fb21e
NJ
1385 msg->sg_list, msg->sgt.nents,
1386 is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
0626e664
NJ
1387 sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
1388 kfree(msg);
1389 return ret;
0626e664
NJ
1390}
1391
64b39f4a 1392static int smb_direct_rdma_write(struct ksmbd_transport *t, void *buf,
070fb21e
NJ
1393 unsigned int buflen, u32 remote_key,
1394 u64 remote_offset, u32 remote_len)
0626e664
NJ
1395{
1396 return smb_direct_rdma_xmit(SMB_DIRECT_TRANS(t), buf, buflen,
070fb21e
NJ
1397 remote_key, remote_offset,
1398 remote_len, false);
0626e664
NJ
1399}
1400
64b39f4a 1401static int smb_direct_rdma_read(struct ksmbd_transport *t, void *buf,
070fb21e
NJ
1402 unsigned int buflen, u32 remote_key,
1403 u64 remote_offset, u32 remote_len)
0626e664
NJ
1404{
1405 return smb_direct_rdma_xmit(SMB_DIRECT_TRANS(t), buf, buflen,
070fb21e
NJ
1406 remote_key, remote_offset,
1407 remote_len, true);
0626e664
NJ
1408}
1409
1410static void smb_direct_disconnect(struct ksmbd_transport *t)
1411{
1412 struct smb_direct_transport *st = SMB_DIRECT_TRANS(t);
1413
1414 ksmbd_debug(RDMA, "Disconnecting cm_id=%p\n", st->cm_id);
1415
1416 smb_direct_disconnect_rdma_connection(st);
1417 wait_event_interruptible(st->wait_status,
070fb21e 1418 st->status == SMB_DIRECT_CS_DISCONNECTED);
0626e664
NJ
1419 free_transport(st);
1420}
1421
1422static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
070fb21e 1423 struct rdma_cm_event *event)
0626e664
NJ
1424{
1425 struct smb_direct_transport *t = cm_id->context;
1426
1427 ksmbd_debug(RDMA, "RDMA CM event. cm_id=%p event=%s (%d)\n",
070fb21e 1428 cm_id, rdma_event_msg(event->event), event->event);
0626e664
NJ
1429
1430 switch (event->event) {
1431 case RDMA_CM_EVENT_ESTABLISHED: {
1432 t->status = SMB_DIRECT_CS_CONNECTED;
1433 wake_up_interruptible(&t->wait_status);
1434 break;
1435 }
1436 case RDMA_CM_EVENT_DEVICE_REMOVAL:
1437 case RDMA_CM_EVENT_DISCONNECTED: {
1438 t->status = SMB_DIRECT_CS_DISCONNECTED;
1439 wake_up_interruptible(&t->wait_status);
1440 wake_up_interruptible(&t->wait_reassembly_queue);
1441 wake_up(&t->wait_send_credits);
1442 break;
1443 }
1444 case RDMA_CM_EVENT_CONNECT_ERROR: {
1445 t->status = SMB_DIRECT_CS_DISCONNECTED;
1446 wake_up_interruptible(&t->wait_status);
1447 break;
1448 }
1449 default:
bde1694a
NJ
1450 pr_err("Unexpected RDMA CM event. cm_id=%p, event=%s (%d)\n",
1451 cm_id, rdma_event_msg(event->event),
1452 event->event);
0626e664
NJ
1453 break;
1454 }
1455 return 0;
1456}
1457
1458static void smb_direct_qpair_handler(struct ib_event *event, void *context)
1459{
1460 struct smb_direct_transport *t = context;
1461
1462 ksmbd_debug(RDMA, "Received QP event. cm_id=%p, event=%s (%d)\n",
070fb21e 1463 t->cm_id, ib_event_msg(event->event), event->event);
0626e664
NJ
1464
1465 switch (event->event) {
1466 case IB_EVENT_CQ_ERR:
1467 case IB_EVENT_QP_FATAL:
1468 smb_direct_disconnect_rdma_connection(t);
1469 break;
1470 default:
1471 break;
1472 }
1473}
1474
1475static int smb_direct_send_negotiate_response(struct smb_direct_transport *t,
070fb21e 1476 int failed)
0626e664
NJ
1477{
1478 struct smb_direct_sendmsg *sendmsg;
1479 struct smb_direct_negotiate_resp *resp;
1480 int ret;
1481
1482 sendmsg = smb_direct_alloc_sendmsg(t);
1483 if (IS_ERR(sendmsg))
1484 return -ENOMEM;
1485
1486 resp = (struct smb_direct_negotiate_resp *)sendmsg->packet;
1487 if (failed) {
1488 memset(resp, 0, sizeof(*resp));
1489 resp->min_version = cpu_to_le16(0x0100);
1490 resp->max_version = cpu_to_le16(0x0100);
1491 resp->status = STATUS_NOT_SUPPORTED;
1492 } else {
1493 resp->status = STATUS_SUCCESS;
1494 resp->min_version = SMB_DIRECT_VERSION_LE;
1495 resp->max_version = SMB_DIRECT_VERSION_LE;
1496 resp->negotiated_version = SMB_DIRECT_VERSION_LE;
1497 resp->reserved = 0;
1498 resp->credits_requested =
1499 cpu_to_le16(t->send_credit_target);
64b39f4a 1500 resp->credits_granted = cpu_to_le16(manage_credits_prior_sending(t));
0626e664
NJ
1501 resp->max_readwrite_size = cpu_to_le32(t->max_rdma_rw_size);
1502 resp->preferred_send_size = cpu_to_le32(t->max_send_size);
1503 resp->max_receive_size = cpu_to_le32(t->max_recv_size);
1504 resp->max_fragmented_size =
1505 cpu_to_le32(t->max_fragmented_recv_size);
1506 }
1507
1508 sendmsg->sge[0].addr = ib_dma_map_single(t->cm_id->device,
070fb21e
NJ
1509 (void *)resp, sizeof(*resp),
1510 DMA_TO_DEVICE);
1511 ret = ib_dma_mapping_error(t->cm_id->device, sendmsg->sge[0].addr);
0626e664
NJ
1512 if (ret) {
1513 smb_direct_free_sendmsg(t, sendmsg);
1514 return ret;
1515 }
1516
1517 sendmsg->num_sge = 1;
1518 sendmsg->sge[0].length = sizeof(*resp);
1519 sendmsg->sge[0].lkey = t->pd->local_dma_lkey;
1520
1521 ret = post_sendmsg(t, NULL, sendmsg);
1522 if (ret) {
1523 smb_direct_free_sendmsg(t, sendmsg);
1524 return ret;
1525 }
1526
1527 wait_event(t->wait_send_pending,
070fb21e 1528 atomic_read(&t->send_pending) == 0);
0626e664
NJ
1529 return 0;
1530}
1531
1532static int smb_direct_accept_client(struct smb_direct_transport *t)
1533{
1534 struct rdma_conn_param conn_param;
1535 struct ib_port_immutable port_immutable;
1536 u32 ird_ord_hdr[2];
1537 int ret;
1538
1539 memset(&conn_param, 0, sizeof(conn_param));
070fb21e
NJ
1540 conn_param.initiator_depth = min_t(u8, t->cm_id->device->attrs.max_qp_rd_atom,
1541 SMB_DIRECT_CM_INITIATOR_DEPTH);
0626e664
NJ
1542 conn_param.responder_resources = 0;
1543
1544 t->cm_id->device->ops.get_port_immutable(t->cm_id->device,
070fb21e
NJ
1545 t->cm_id->port_num,
1546 &port_immutable);
0626e664
NJ
1547 if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
1548 ird_ord_hdr[0] = conn_param.responder_resources;
1549 ird_ord_hdr[1] = 1;
1550 conn_param.private_data = ird_ord_hdr;
1551 conn_param.private_data_len = sizeof(ird_ord_hdr);
1552 } else {
1553 conn_param.private_data = NULL;
1554 conn_param.private_data_len = 0;
1555 }
1556 conn_param.retry_count = SMB_DIRECT_CM_RETRY;
1557 conn_param.rnr_retry_count = SMB_DIRECT_CM_RNR_RETRY;
1558 conn_param.flow_control = 0;
1559
1560 ret = rdma_accept(t->cm_id, &conn_param);
1561 if (ret) {
bde1694a 1562 pr_err("error at rdma_accept: %d\n", ret);
0626e664
NJ
1563 return ret;
1564 }
1565
1566 wait_event_interruptible(t->wait_status,
1567 t->status != SMB_DIRECT_CS_NEW);
1568 if (t->status != SMB_DIRECT_CS_CONNECTED)
1569 return -ENOTCONN;
1570 return 0;
1571}
1572
1573static int smb_direct_negotiate(struct smb_direct_transport *t)
1574{
1575 int ret;
1576 struct smb_direct_recvmsg *recvmsg;
1577 struct smb_direct_negotiate_req *req;
1578
1579 recvmsg = get_free_recvmsg(t);
1580 if (!recvmsg)
1581 return -ENOMEM;
1582 recvmsg->type = SMB_DIRECT_MSG_NEGOTIATE_REQ;
1583
1584 ret = smb_direct_post_recv(t, recvmsg);
1585 if (ret) {
bde1694a 1586 pr_err("Can't post recv: %d\n", ret);
0626e664
NJ
1587 goto out;
1588 }
1589
1590 t->negotiation_requested = false;
1591 ret = smb_direct_accept_client(t);
1592 if (ret) {
bde1694a 1593 pr_err("Can't accept client\n");
0626e664
NJ
1594 goto out;
1595 }
1596
1597 smb_direct_post_recv_credits(&t->post_recv_credits_work.work);
1598
1599 ksmbd_debug(RDMA, "Waiting for SMB_DIRECT negotiate request\n");
1600 ret = wait_event_interruptible_timeout(t->wait_status,
070fb21e
NJ
1601 t->negotiation_requested ||
1602 t->status == SMB_DIRECT_CS_DISCONNECTED,
1603 SMB_DIRECT_NEGOTIATE_TIMEOUT * HZ);
0626e664
NJ
1604 if (ret <= 0 || t->status == SMB_DIRECT_CS_DISCONNECTED) {
1605 ret = ret < 0 ? ret : -ETIMEDOUT;
1606 goto out;
1607 }
1608
1609 ret = smb_direct_check_recvmsg(recvmsg);
1610 if (ret == -ECONNABORTED)
1611 goto out;
1612
1613 req = (struct smb_direct_negotiate_req *)recvmsg->packet;
1614 t->max_recv_size = min_t(int, t->max_recv_size,
070fb21e 1615 le32_to_cpu(req->preferred_send_size));
0626e664 1616 t->max_send_size = min_t(int, t->max_send_size,
070fb21e 1617 le32_to_cpu(req->max_receive_size));
0626e664
NJ
1618 t->max_fragmented_send_size =
1619 le32_to_cpu(req->max_fragmented_size);
1620
1621 ret = smb_direct_send_negotiate_response(t, ret);
1622out:
1623 if (recvmsg)
1624 put_recvmsg(t, recvmsg);
1625 return ret;
1626}
1627
1628static int smb_direct_init_params(struct smb_direct_transport *t,
070fb21e 1629 struct ib_qp_cap *cap)
0626e664
NJ
1630{
1631 struct ib_device *device = t->cm_id->device;
1632 int max_send_sges, max_pages, max_rw_wrs, max_send_wrs;
1633
1634 /* need 2 more sge. because a SMB_DIRECT header will be mapped,
1635 * and maybe a send buffer could be not page aligned.
1636 */
1637 t->max_send_size = smb_direct_max_send_size;
1638 max_send_sges = DIV_ROUND_UP(t->max_send_size, PAGE_SIZE) + 2;
1639 if (max_send_sges > SMB_DIRECT_MAX_SEND_SGES) {
bde1694a 1640 pr_err("max_send_size %d is too large\n", t->max_send_size);
0626e664
NJ
1641 return -EINVAL;
1642 }
1643
1644 /*
1645 * allow smb_direct_max_outstanding_rw_ops of in-flight RDMA
1646 * read/writes. HCA guarantees at least max_send_sge of sges for
1647 * a RDMA read/write work request, and if memory registration is used,
1648 * we need reg_mr, local_inv wrs for each read/write.
1649 */
1650 t->max_rdma_rw_size = smb_direct_max_read_write_size;
1651 max_pages = DIV_ROUND_UP(t->max_rdma_rw_size, PAGE_SIZE) + 1;
1652 max_rw_wrs = DIV_ROUND_UP(max_pages, SMB_DIRECT_MAX_SEND_SGES);
1653 max_rw_wrs += rdma_rw_mr_factor(device, t->cm_id->port_num,
1654 max_pages) * 2;
1655 max_rw_wrs *= smb_direct_max_outstanding_rw_ops;
1656
1657 max_send_wrs = smb_direct_send_credit_target + max_rw_wrs;
1658 if (max_send_wrs > device->attrs.max_cqe ||
64b39f4a 1659 max_send_wrs > device->attrs.max_qp_wr) {
bde1694a
NJ
1660 pr_err("consider lowering send_credit_target = %d, or max_outstanding_rw_ops = %d\n",
1661 smb_direct_send_credit_target,
1662 smb_direct_max_outstanding_rw_ops);
1663 pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1664 device->attrs.max_cqe, device->attrs.max_qp_wr);
0626e664
NJ
1665 return -EINVAL;
1666 }
1667
1668 if (smb_direct_receive_credit_max > device->attrs.max_cqe ||
1669 smb_direct_receive_credit_max > device->attrs.max_qp_wr) {
bde1694a
NJ
1670 pr_err("consider lowering receive_credit_max = %d\n",
1671 smb_direct_receive_credit_max);
1672 pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1673 device->attrs.max_cqe, device->attrs.max_qp_wr);
0626e664
NJ
1674 return -EINVAL;
1675 }
1676
1677 if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
bde1694a
NJ
1678 pr_err("warning: device max_send_sge = %d too small\n",
1679 device->attrs.max_send_sge);
0626e664
NJ
1680 return -EINVAL;
1681 }
1682 if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
bde1694a
NJ
1683 pr_err("warning: device max_recv_sge = %d too small\n",
1684 device->attrs.max_recv_sge);
0626e664
NJ
1685 return -EINVAL;
1686 }
1687
1688 t->recv_credits = 0;
1689 t->count_avail_recvmsg = 0;
1690
1691 t->recv_credit_max = smb_direct_receive_credit_max;
1692 t->recv_credit_target = 10;
1693 t->new_recv_credits = 0;
1694
1695 t->send_credit_target = smb_direct_send_credit_target;
1696 atomic_set(&t->send_credits, 0);
1697 atomic_set(&t->rw_avail_ops, smb_direct_max_outstanding_rw_ops);
1698
1699 t->max_send_size = smb_direct_max_send_size;
1700 t->max_recv_size = smb_direct_max_receive_size;
1701 t->max_fragmented_recv_size = smb_direct_max_fragmented_recv_size;
1702
1703 cap->max_send_wr = max_send_wrs;
1704 cap->max_recv_wr = t->recv_credit_max;
1705 cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
1706 cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
1707 cap->max_inline_data = 0;
1708 cap->max_rdma_ctxs = 0;
1709 return 0;
1710}
1711
1712static void smb_direct_destroy_pools(struct smb_direct_transport *t)
1713{
1714 struct smb_direct_recvmsg *recvmsg;
1715
1716 while ((recvmsg = get_free_recvmsg(t)))
1717 mempool_free(recvmsg, t->recvmsg_mempool);
1718 while ((recvmsg = get_empty_recvmsg(t)))
1719 mempool_free(recvmsg, t->recvmsg_mempool);
1720
1721 mempool_destroy(t->recvmsg_mempool);
1722 t->recvmsg_mempool = NULL;
1723
1724 kmem_cache_destroy(t->recvmsg_cache);
1725 t->recvmsg_cache = NULL;
1726
1727 mempool_destroy(t->sendmsg_mempool);
1728 t->sendmsg_mempool = NULL;
1729
1730 kmem_cache_destroy(t->sendmsg_cache);
1731 t->sendmsg_cache = NULL;
1732}
1733
1734static int smb_direct_create_pools(struct smb_direct_transport *t)
1735{
1736 char name[80];
1737 int i;
1738 struct smb_direct_recvmsg *recvmsg;
1739
1740 snprintf(name, sizeof(name), "smb_direct_rqst_pool_%p", t);
1741 t->sendmsg_cache = kmem_cache_create(name,
070fb21e
NJ
1742 sizeof(struct smb_direct_sendmsg) +
1743 sizeof(struct smb_direct_negotiate_resp),
1744 0, SLAB_HWCACHE_ALIGN, NULL);
0626e664
NJ
1745 if (!t->sendmsg_cache)
1746 return -ENOMEM;
1747
1748 t->sendmsg_mempool = mempool_create(t->send_credit_target,
070fb21e
NJ
1749 mempool_alloc_slab, mempool_free_slab,
1750 t->sendmsg_cache);
0626e664
NJ
1751 if (!t->sendmsg_mempool)
1752 goto err;
1753
1754 snprintf(name, sizeof(name), "smb_direct_resp_%p", t);
1755 t->recvmsg_cache = kmem_cache_create(name,
070fb21e
NJ
1756 sizeof(struct smb_direct_recvmsg) +
1757 t->max_recv_size,
1758 0, SLAB_HWCACHE_ALIGN, NULL);
0626e664
NJ
1759 if (!t->recvmsg_cache)
1760 goto err;
1761
1762 t->recvmsg_mempool =
1763 mempool_create(t->recv_credit_max, mempool_alloc_slab,
070fb21e 1764 mempool_free_slab, t->recvmsg_cache);
0626e664
NJ
1765 if (!t->recvmsg_mempool)
1766 goto err;
1767
1768 INIT_LIST_HEAD(&t->recvmsg_queue);
1769
1770 for (i = 0; i < t->recv_credit_max; i++) {
1771 recvmsg = mempool_alloc(t->recvmsg_mempool, GFP_KERNEL);
1772 if (!recvmsg)
1773 goto err;
1774 recvmsg->transport = t;
1775 list_add(&recvmsg->list, &t->recvmsg_queue);
1776 }
1777 t->count_avail_recvmsg = t->recv_credit_max;
1778
1779 return 0;
1780err:
1781 smb_direct_destroy_pools(t);
1782 return -ENOMEM;
1783}
1784
1785static int smb_direct_create_qpair(struct smb_direct_transport *t,
070fb21e 1786 struct ib_qp_cap *cap)
0626e664
NJ
1787{
1788 int ret;
1789 struct ib_qp_init_attr qp_attr;
1790
1791 t->pd = ib_alloc_pd(t->cm_id->device, 0);
1792 if (IS_ERR(t->pd)) {
bde1694a 1793 pr_err("Can't create RDMA PD\n");
0626e664
NJ
1794 ret = PTR_ERR(t->pd);
1795 t->pd = NULL;
1796 return ret;
1797 }
1798
1799 t->send_cq = ib_alloc_cq(t->cm_id->device, t,
070fb21e 1800 t->send_credit_target, 0, IB_POLL_WORKQUEUE);
0626e664 1801 if (IS_ERR(t->send_cq)) {
bde1694a 1802 pr_err("Can't create RDMA send CQ\n");
0626e664
NJ
1803 ret = PTR_ERR(t->send_cq);
1804 t->send_cq = NULL;
1805 goto err;
1806 }
1807
1808 t->recv_cq = ib_alloc_cq(t->cm_id->device, t,
070fb21e
NJ
1809 cap->max_send_wr + cap->max_rdma_ctxs,
1810 0, IB_POLL_WORKQUEUE);
0626e664 1811 if (IS_ERR(t->recv_cq)) {
bde1694a 1812 pr_err("Can't create RDMA recv CQ\n");
0626e664
NJ
1813 ret = PTR_ERR(t->recv_cq);
1814 t->recv_cq = NULL;
1815 goto err;
1816 }
1817
1818 memset(&qp_attr, 0, sizeof(qp_attr));
1819 qp_attr.event_handler = smb_direct_qpair_handler;
1820 qp_attr.qp_context = t;
1821 qp_attr.cap = *cap;
1822 qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
1823 qp_attr.qp_type = IB_QPT_RC;
1824 qp_attr.send_cq = t->send_cq;
1825 qp_attr.recv_cq = t->recv_cq;
1826 qp_attr.port_num = ~0;
1827
1828 ret = rdma_create_qp(t->cm_id, t->pd, &qp_attr);
1829 if (ret) {
bde1694a 1830 pr_err("Can't create RDMA QP: %d\n", ret);
0626e664
NJ
1831 goto err;
1832 }
1833
1834 t->qp = t->cm_id->qp;
1835 t->cm_id->event_handler = smb_direct_cm_handler;
1836
1837 return 0;
1838err:
1839 if (t->qp) {
1840 ib_destroy_qp(t->qp);
1841 t->qp = NULL;
1842 }
1843 if (t->recv_cq) {
1844 ib_destroy_cq(t->recv_cq);
1845 t->recv_cq = NULL;
1846 }
1847 if (t->send_cq) {
1848 ib_destroy_cq(t->send_cq);
1849 t->send_cq = NULL;
1850 }
1851 if (t->pd) {
1852 ib_dealloc_pd(t->pd);
1853 t->pd = NULL;
1854 }
1855 return ret;
1856}
1857
1858static int smb_direct_prepare(struct ksmbd_transport *t)
1859{
1860 struct smb_direct_transport *st = SMB_DIRECT_TRANS(t);
1861 int ret;
1862 struct ib_qp_cap qp_cap;
1863
1864 ret = smb_direct_init_params(st, &qp_cap);
1865 if (ret) {
bde1694a 1866 pr_err("Can't configure RDMA parameters\n");
0626e664
NJ
1867 return ret;
1868 }
1869
1870 ret = smb_direct_create_pools(st);
1871 if (ret) {
bde1694a 1872 pr_err("Can't init RDMA pool: %d\n", ret);
0626e664
NJ
1873 return ret;
1874 }
1875
1876 ret = smb_direct_create_qpair(st, &qp_cap);
1877 if (ret) {
bde1694a 1878 pr_err("Can't accept RDMA client: %d\n", ret);
0626e664
NJ
1879 return ret;
1880 }
1881
1882 ret = smb_direct_negotiate(st);
1883 if (ret) {
bde1694a 1884 pr_err("Can't negotiate: %d\n", ret);
0626e664
NJ
1885 return ret;
1886 }
1887
1888 st->status = SMB_DIRECT_CS_CONNECTED;
1889 return 0;
1890}
1891
1892static bool rdma_frwr_is_supported(struct ib_device_attr *attrs)
1893{
1894 if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
1895 return false;
1896 if (attrs->max_fast_reg_page_list_len == 0)
1897 return false;
1898 return true;
1899}
1900
1901static int smb_direct_handle_connect_request(struct rdma_cm_id *new_cm_id)
1902{
1903 struct smb_direct_transport *t;
1904
1905 if (!rdma_frwr_is_supported(&new_cm_id->device->attrs)) {
1906 ksmbd_debug(RDMA,
070fb21e
NJ
1907 "Fast Registration Work Requests is not supported. device capabilities=%llx\n",
1908 new_cm_id->device->attrs.device_cap_flags);
0626e664
NJ
1909 return -EPROTONOSUPPORT;
1910 }
1911
1912 t = alloc_transport(new_cm_id);
1913 if (!t)
1914 return -ENOMEM;
1915
1916 KSMBD_TRANS(t)->handler = kthread_run(ksmbd_conn_handler_loop,
070fb21e
NJ
1917 KSMBD_TRANS(t)->conn, "ksmbd:r%u",
1918 SMB_DIRECT_PORT);
0626e664
NJ
1919 if (IS_ERR(KSMBD_TRANS(t)->handler)) {
1920 int ret = PTR_ERR(KSMBD_TRANS(t)->handler);
1921
bde1694a 1922 pr_err("Can't start thread\n");
0626e664
NJ
1923 free_transport(t);
1924 return ret;
1925 }
1926
1927 return 0;
1928}
1929
1930static int smb_direct_listen_handler(struct rdma_cm_id *cm_id,
070fb21e 1931 struct rdma_cm_event *event)
0626e664
NJ
1932{
1933 switch (event->event) {
1934 case RDMA_CM_EVENT_CONNECT_REQUEST: {
1935 int ret = smb_direct_handle_connect_request(cm_id);
1936
1937 if (ret) {
bde1694a 1938 pr_err("Can't create transport: %d\n", ret);
0626e664
NJ
1939 return ret;
1940 }
1941
1942 ksmbd_debug(RDMA, "Received connection request. cm_id=%p\n",
070fb21e 1943 cm_id);
0626e664
NJ
1944 break;
1945 }
1946 default:
bde1694a
NJ
1947 pr_err("Unexpected listen event. cm_id=%p, event=%s (%d)\n",
1948 cm_id, rdma_event_msg(event->event), event->event);
0626e664
NJ
1949 break;
1950 }
1951 return 0;
1952}
1953
1954static int smb_direct_listen(int port)
1955{
1956 int ret;
1957 struct rdma_cm_id *cm_id;
1958 struct sockaddr_in sin = {
1959 .sin_family = AF_INET,
1960 .sin_addr.s_addr = htonl(INADDR_ANY),
1961 .sin_port = htons(port),
1962 };
1963
1964 cm_id = rdma_create_id(&init_net, smb_direct_listen_handler,
070fb21e 1965 &smb_direct_listener, RDMA_PS_TCP, IB_QPT_RC);
0626e664 1966 if (IS_ERR(cm_id)) {
bde1694a 1967 pr_err("Can't create cm id: %ld\n", PTR_ERR(cm_id));
0626e664
NJ
1968 return PTR_ERR(cm_id);
1969 }
1970
1971 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
1972 if (ret) {
bde1694a 1973 pr_err("Can't bind: %d\n", ret);
0626e664
NJ
1974 goto err;
1975 }
1976
1977 smb_direct_listener.cm_id = cm_id;
1978
1979 ret = rdma_listen(cm_id, 10);
1980 if (ret) {
bde1694a 1981 pr_err("Can't listen: %d\n", ret);
0626e664
NJ
1982 goto err;
1983 }
1984 return 0;
1985err:
1986 smb_direct_listener.cm_id = NULL;
1987 rdma_destroy_id(cm_id);
1988 return ret;
1989}
1990
1991int ksmbd_rdma_init(void)
1992{
1993 int ret;
1994
1995 smb_direct_listener.cm_id = NULL;
1996
1997 /* When a client is running out of send credits, the credits are
1998 * granted by the server's sending a packet using this queue.
1999 * This avoids the situation that a clients cannot send packets
2000 * for lack of credits
2001 */
2002 smb_direct_wq = alloc_workqueue("ksmbd-smb_direct-wq",
070fb21e 2003 WQ_HIGHPRI | WQ_MEM_RECLAIM, 0);
0626e664
NJ
2004 if (!smb_direct_wq)
2005 return -ENOMEM;
2006
2007 ret = smb_direct_listen(SMB_DIRECT_PORT);
2008 if (ret) {
2009 destroy_workqueue(smb_direct_wq);
2010 smb_direct_wq = NULL;
bde1694a 2011 pr_err("Can't listen: %d\n", ret);
0626e664
NJ
2012 return ret;
2013 }
2014
2015 ksmbd_debug(RDMA, "init RDMA listener. cm_id=%p\n",
070fb21e 2016 smb_direct_listener.cm_id);
0626e664
NJ
2017 return 0;
2018}
2019
2020int ksmbd_rdma_destroy(void)
2021{
2022 if (smb_direct_listener.cm_id)
2023 rdma_destroy_id(smb_direct_listener.cm_id);
2024 smb_direct_listener.cm_id = NULL;
2025
2026 if (smb_direct_wq) {
2027 flush_workqueue(smb_direct_wq);
2028 destroy_workqueue(smb_direct_wq);
2029 smb_direct_wq = NULL;
2030 }
2031 return 0;
2032}
2033
2034static struct ksmbd_transport_ops ksmbd_smb_direct_transport_ops = {
2035 .prepare = smb_direct_prepare,
2036 .disconnect = smb_direct_disconnect,
2037 .writev = smb_direct_writev,
2038 .read = smb_direct_read,
2039 .rdma_read = smb_direct_rdma_read,
2040 .rdma_write = smb_direct_rdma_write,
2041};