Merge branch 'x86-pti-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / steering / dr_send.c
CommitLineData
297ccceb
AV
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2019 Mellanox Technologies. */
3
4#include "dr_types.h"
5
6#define QUEUE_SIZE 128
7#define SIGNAL_PER_DIV_QUEUE 16
8#define TH_NUMS_TO_DRAIN 2
9
10enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
11
12struct dr_data_seg {
13 u64 addr;
14 u32 length;
15 u32 lkey;
16 unsigned int send_flags;
17};
18
19struct postsend_info {
20 struct dr_data_seg write;
21 struct dr_data_seg read;
22 u64 remote_addr;
23 u32 rkey;
24};
25
26struct dr_qp_rtr_attr {
27 struct mlx5dr_cmd_gid_attr dgid_attr;
28 enum ib_mtu mtu;
29 u32 qp_num;
30 u16 port_num;
31 u8 min_rnr_timer;
32 u8 sgid_index;
33 u16 udp_src_port;
34};
35
36struct dr_qp_rts_attr {
37 u8 timeout;
38 u8 retry_cnt;
39 u8 rnr_retry;
40};
41
42struct dr_qp_init_attr {
43 u32 cqn;
44 u32 pdn;
45 u32 max_send_wr;
46 struct mlx5_uars_page *uar;
47};
48
49static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
50{
51 unsigned int idx;
52 u8 opcode;
53
54 opcode = get_cqe_opcode(cqe64);
55 if (opcode == MLX5_CQE_REQ_ERR) {
56 idx = be16_to_cpu(cqe64->wqe_counter) &
57 (dr_cq->qp->sq.wqe_cnt - 1);
58 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
59 } else if (opcode == MLX5_CQE_RESP_ERR) {
60 ++dr_cq->qp->sq.cc;
61 } else {
62 idx = be16_to_cpu(cqe64->wqe_counter) &
63 (dr_cq->qp->sq.wqe_cnt - 1);
64 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
65
66 return CQ_OK;
67 }
68
69 return CQ_POLL_ERR;
70}
71
72static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
73{
74 struct mlx5_cqe64 *cqe64;
75 int err;
76
77 cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
78 if (!cqe64)
79 return CQ_EMPTY;
80
81 mlx5_cqwq_pop(&dr_cq->wq);
82 err = dr_parse_cqe(dr_cq, cqe64);
83 mlx5_cqwq_update_db_record(&dr_cq->wq);
84
85 return err;
86}
87
88static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
89{
90 int npolled;
91 int err = 0;
92
93 for (npolled = 0; npolled < ne; ++npolled) {
94 err = dr_cq_poll_one(dr_cq);
95 if (err != CQ_OK)
96 break;
97 }
98
99 return err == CQ_POLL_ERR ? err : npolled;
100}
101
102static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
103{
104 pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
105}
106
107static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
108 struct dr_qp_init_attr *attr)
109{
110 u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
111 struct mlx5_wq_param wqp;
112 struct mlx5dr_qp *dr_qp;
113 int inlen;
114 void *qpc;
115 void *in;
116 int err;
117
118 dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
119 if (!dr_qp)
120 return NULL;
121
122 wqp.buf_numa_node = mdev->priv.numa_node;
123 wqp.db_numa_node = mdev->priv.numa_node;
124
125 dr_qp->rq.pc = 0;
126 dr_qp->rq.cc = 0;
127 dr_qp->rq.wqe_cnt = 4;
128 dr_qp->sq.pc = 0;
129 dr_qp->sq.cc = 0;
130 dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
131
132 MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
133 MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
134 MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
135 err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
136 &dr_qp->wq_ctrl);
137 if (err) {
138 mlx5_core_info(mdev, "Can't create QP WQ\n");
139 goto err_wq;
140 }
141
142 dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
143 sizeof(dr_qp->sq.wqe_head[0]),
144 GFP_KERNEL);
145
146 if (!dr_qp->sq.wqe_head) {
147 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
148 goto err_wqe_head;
149 }
150
151 inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
152 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
153 dr_qp->wq_ctrl.buf.npages;
154 in = kvzalloc(inlen, GFP_KERNEL);
155 if (!in) {
156 err = -ENOMEM;
157 goto err_in;
158 }
159
160 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
161 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
162 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
163 MLX5_SET(qpc, qpc, pd, attr->pdn);
164 MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
165 MLX5_SET(qpc, qpc, log_page_size,
166 dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
167 MLX5_SET(qpc, qpc, fre, 1);
168 MLX5_SET(qpc, qpc, rlky, 1);
169 MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
170 MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
171 MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
172 MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
173 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
174 MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
175 MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
176 if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
177 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
178 mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
179 (__be64 *)MLX5_ADDR_OF(create_qp_in,
180 in, pas));
181
182 err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
183 kfree(in);
184
185 if (err) {
186 mlx5_core_warn(mdev, " Can't create QP\n");
187 goto err_in;
188 }
189 dr_qp->mqp.event = dr_qp_event;
190 dr_qp->uar = attr->uar;
191
192 return dr_qp;
193
194err_in:
195 kfree(dr_qp->sq.wqe_head);
196err_wqe_head:
197 mlx5_wq_destroy(&dr_qp->wq_ctrl);
198err_wq:
199 kfree(dr_qp);
200 return NULL;
201}
202
203static void dr_destroy_qp(struct mlx5_core_dev *mdev,
204 struct mlx5dr_qp *dr_qp)
205{
206 mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
207 kfree(dr_qp->sq.wqe_head);
208 mlx5_wq_destroy(&dr_qp->wq_ctrl);
209 kfree(dr_qp);
210}
211
212static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
213{
214 dma_wmb();
215 *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xfffff);
216
217 /* After wmb() the hw aware of new work */
218 wmb();
219
220 mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
221}
222
223static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
224 u32 rkey, struct dr_data_seg *data_seg,
225 u32 opcode, int nreq)
226{
227 struct mlx5_wqe_raddr_seg *wq_raddr;
228 struct mlx5_wqe_ctrl_seg *wq_ctrl;
229 struct mlx5_wqe_data_seg *wq_dseg;
230 unsigned int size;
231 unsigned int idx;
232
233 size = sizeof(*wq_ctrl) / 16 + sizeof(*wq_dseg) / 16 +
234 sizeof(*wq_raddr) / 16;
235
236 idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
237
238 wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
239 wq_ctrl->imm = 0;
240 wq_ctrl->fm_ce_se = (data_seg->send_flags) ?
241 MLX5_WQE_CTRL_CQ_UPDATE : 0;
242 wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
243 opcode);
244 wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
245 wq_raddr = (void *)(wq_ctrl + 1);
246 wq_raddr->raddr = cpu_to_be64(remote_addr);
247 wq_raddr->rkey = cpu_to_be32(rkey);
248 wq_raddr->reserved = 0;
249
250 wq_dseg = (void *)(wq_raddr + 1);
251 wq_dseg->byte_count = cpu_to_be32(data_seg->length);
252 wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
253 wq_dseg->addr = cpu_to_be64(data_seg->addr);
254
255 dr_qp->sq.wqe_head[idx] = dr_qp->sq.pc++;
256
257 if (nreq)
258 dr_cmd_notify_hw(dr_qp, wq_ctrl);
259}
260
261static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
262{
263 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
264 &send_info->write, MLX5_OPCODE_RDMA_WRITE, 0);
265 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
266 &send_info->read, MLX5_OPCODE_RDMA_READ, 1);
267}
268
269/**
270 * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
271 * with send_list parameters:
272 *
273 * @ste: The data that attached to this specific ste
274 * @size: of data to write
275 * @offset: of the data from start of the hw_ste entry
276 * @data: data
277 * @ste_info: ste to be sent with send_list
278 * @send_list: to append into it
279 * @copy_data: if true indicates that the data should be kept because
280 * it's not backuped any where (like in re-hash).
281 * if false, it lets the data to be updated after
282 * it was added to the list.
283 */
284void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
285 u16 offset, u8 *data,
286 struct mlx5dr_ste_send_info *ste_info,
287 struct list_head *send_list,
288 bool copy_data)
289{
290 ste_info->size = size;
291 ste_info->ste = ste;
292 ste_info->offset = offset;
293
294 if (copy_data) {
295 memcpy(ste_info->data_cont, data, size);
296 ste_info->data = ste_info->data_cont;
297 } else {
298 ste_info->data = data;
299 }
300
301 list_add_tail(&ste_info->send_list, send_list);
302}
303
304/* The function tries to consume one wc each time, unless the queue is full, in
305 * that case, which means that the hw is behind the sw in a full queue len
306 * the function will drain the cq till it empty.
307 */
308static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
309 struct mlx5dr_send_ring *send_ring)
310{
311 bool is_drain = false;
312 int ne;
313
314 if (send_ring->pending_wqe < send_ring->signal_th)
315 return 0;
316
317 /* Queue is full start drain it */
318 if (send_ring->pending_wqe >=
319 dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
320 is_drain = true;
321
322 do {
323 ne = dr_poll_cq(send_ring->cq, 1);
324 if (ne < 0)
325 return ne;
326 else if (ne == 1)
327 send_ring->pending_wqe -= send_ring->signal_th;
328 } while (is_drain && send_ring->pending_wqe);
329
330 return 0;
331}
332
333static void dr_fill_data_segs(struct mlx5dr_send_ring *send_ring,
334 struct postsend_info *send_info)
335{
336 send_ring->pending_wqe++;
337
338 if (send_ring->pending_wqe % send_ring->signal_th == 0)
339 send_info->write.send_flags |= IB_SEND_SIGNALED;
340
341 send_ring->pending_wqe++;
342 send_info->read.length = send_info->write.length;
343 /* Read into the same write area */
344 send_info->read.addr = (uintptr_t)send_info->write.addr;
345 send_info->read.lkey = send_ring->mr->mkey.key;
346
347 if (send_ring->pending_wqe % send_ring->signal_th == 0)
348 send_info->read.send_flags = IB_SEND_SIGNALED;
349 else
350 send_info->read.send_flags = 0;
351}
352
353static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
354 struct postsend_info *send_info)
355{
356 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
357 u32 buff_offset;
358 int ret;
359
360 ret = dr_handle_pending_wc(dmn, send_ring);
361 if (ret)
362 return ret;
363
364 if (send_info->write.length > dmn->info.max_inline_size) {
365 buff_offset = (send_ring->tx_head &
366 (dmn->send_ring->signal_th - 1)) *
367 send_ring->max_post_send_size;
368 /* Copy to ring mr */
369 memcpy(send_ring->buf + buff_offset,
370 (void *)(uintptr_t)send_info->write.addr,
371 send_info->write.length);
372 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
373 send_info->write.lkey = send_ring->mr->mkey.key;
374 }
375
376 send_ring->tx_head++;
377 dr_fill_data_segs(send_ring, send_info);
378 dr_post_send(send_ring->qp, send_info);
379
380 return 0;
381}
382
383static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
384 struct mlx5dr_ste_htbl *htbl,
385 u8 **data,
386 u32 *byte_size,
387 int *iterations,
388 int *num_stes)
389{
390 int alloc_size;
391
392 if (htbl->chunk->byte_size > dmn->send_ring->max_post_send_size) {
393 *iterations = htbl->chunk->byte_size /
394 dmn->send_ring->max_post_send_size;
395 *byte_size = dmn->send_ring->max_post_send_size;
396 alloc_size = *byte_size;
397 *num_stes = *byte_size / DR_STE_SIZE;
398 } else {
399 *iterations = 1;
400 *num_stes = htbl->chunk->num_of_entries;
401 alloc_size = *num_stes * DR_STE_SIZE;
402 }
403
404 *data = kzalloc(alloc_size, GFP_KERNEL);
405 if (!*data)
406 return -ENOMEM;
407
408 return 0;
409}
410
411/**
412 * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
413 *
414 * @dmn: Domain
415 * @ste: The ste struct that contains the data (at
416 * least part of it)
417 * @data: The real data to send size data
418 * @size: for writing.
419 * @offset: The offset from the icm mapped data to
420 * start write to this for write only part of the
421 * buffer.
422 *
423 * Return: 0 on success.
424 */
425int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
426 u8 *data, u16 size, u16 offset)
427{
428 struct postsend_info send_info = {};
429
430 send_info.write.addr = (uintptr_t)data;
431 send_info.write.length = size;
432 send_info.write.lkey = 0;
433 send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
434 send_info.rkey = ste->htbl->chunk->rkey;
435
436 return dr_postsend_icm_data(dmn, &send_info);
437}
438
439int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
440 struct mlx5dr_ste_htbl *htbl,
441 u8 *formatted_ste, u8 *mask)
442{
443 u32 byte_size = htbl->chunk->byte_size;
444 int num_stes_per_iter;
445 int iterations;
446 u8 *data;
447 int ret;
448 int i;
449 int j;
450
451 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
452 &iterations, &num_stes_per_iter);
453 if (ret)
454 return ret;
455
456 /* Send the data iteration times */
457 for (i = 0; i < iterations; i++) {
458 u32 ste_index = i * (byte_size / DR_STE_SIZE);
459 struct postsend_info send_info = {};
460
461 /* Copy all ste's on the data buffer
462 * need to add the bit_mask
463 */
464 for (j = 0; j < num_stes_per_iter; j++) {
465 u8 *hw_ste = htbl->ste_arr[ste_index + j].hw_ste;
466 u32 ste_off = j * DR_STE_SIZE;
467
468 if (mlx5dr_ste_is_not_valid_entry(hw_ste)) {
469 memcpy(data + ste_off,
470 formatted_ste, DR_STE_SIZE);
471 } else {
472 /* Copy data */
473 memcpy(data + ste_off,
474 htbl->ste_arr[ste_index + j].hw_ste,
475 DR_STE_SIZE_REDUCED);
476 /* Copy bit_mask */
477 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
478 mask, DR_STE_SIZE_MASK);
479 }
480 }
481
482 send_info.write.addr = (uintptr_t)data;
483 send_info.write.length = byte_size;
484 send_info.write.lkey = 0;
485 send_info.remote_addr =
486 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
487 send_info.rkey = htbl->chunk->rkey;
488
489 ret = dr_postsend_icm_data(dmn, &send_info);
490 if (ret)
491 goto out_free;
492 }
493
494out_free:
495 kfree(data);
496 return ret;
497}
498
499/* Initialize htble with default STEs */
500int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
501 struct mlx5dr_ste_htbl *htbl,
502 u8 *ste_init_data,
503 bool update_hw_ste)
504{
505 u32 byte_size = htbl->chunk->byte_size;
506 int iterations;
507 int num_stes;
508 u8 *data;
509 int ret;
510 int i;
511
512 ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
513 &iterations, &num_stes);
514 if (ret)
515 return ret;
516
517 for (i = 0; i < num_stes; i++) {
518 u8 *copy_dst;
519
520 /* Copy the same ste on the data buffer */
521 copy_dst = data + i * DR_STE_SIZE;
522 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
523
524 if (update_hw_ste) {
525 /* Copy the reduced ste to hash table ste_arr */
526 copy_dst = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
527 memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
528 }
529 }
530
531 /* Send the data iteration times */
532 for (i = 0; i < iterations; i++) {
533 u8 ste_index = i * (byte_size / DR_STE_SIZE);
534 struct postsend_info send_info = {};
535
536 send_info.write.addr = (uintptr_t)data;
537 send_info.write.length = byte_size;
538 send_info.write.lkey = 0;
539 send_info.remote_addr =
540 mlx5dr_ste_get_mr_addr(htbl->ste_arr + ste_index);
541 send_info.rkey = htbl->chunk->rkey;
542
543 ret = dr_postsend_icm_data(dmn, &send_info);
544 if (ret)
545 goto out_free;
546 }
547
548out_free:
549 kfree(data);
550 return ret;
551}
552
553int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
554 struct mlx5dr_action *action)
555{
556 struct postsend_info send_info = {};
557 int ret;
558
559 send_info.write.addr = (uintptr_t)action->rewrite.data;
560 send_info.write.length = action->rewrite.chunk->byte_size;
561 send_info.write.lkey = 0;
562 send_info.remote_addr = action->rewrite.chunk->mr_addr;
563 send_info.rkey = action->rewrite.chunk->rkey;
564
565 mutex_lock(&dmn->mutex);
566 ret = dr_postsend_icm_data(dmn, &send_info);
567 mutex_unlock(&dmn->mutex);
568
569 return ret;
570}
571
572static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
573 struct mlx5dr_qp *dr_qp,
574 int port)
575{
576 u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
577 void *qpc;
578
579 qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
580
581 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
582 MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
583 MLX5_SET(qpc, qpc, rre, 1);
584 MLX5_SET(qpc, qpc, rwe, 1);
585
586 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
587 &dr_qp->mqp);
588}
589
590static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
591 struct mlx5dr_qp *dr_qp,
592 struct dr_qp_rts_attr *attr)
593{
594 u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
595 void *qpc;
596
597 qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
598
599 MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
600
601 MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
602 MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
603 MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
604
605 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
606 &dr_qp->mqp);
607}
608
609static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
610 struct mlx5dr_qp *dr_qp,
611 struct dr_qp_rtr_attr *attr)
612{
613 u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
614 void *qpc;
615
616 qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
617
618 MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
619
620 MLX5_SET(qpc, qpc, mtu, attr->mtu);
621 MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
622 MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
623 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
624 attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
625 memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
626 attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
627 MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
628 attr->sgid_index);
629
630 if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
631 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
632 attr->udp_src_port);
633
634 MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
635 MLX5_SET(qpc, qpc, min_rnr_nak, 1);
636
637 return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
638 &dr_qp->mqp);
639}
640
641static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
642{
643 struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
644 struct dr_qp_rts_attr rts_attr = {};
645 struct dr_qp_rtr_attr rtr_attr = {};
646 enum ib_mtu mtu = IB_MTU_1024;
647 u16 gid_index = 0;
648 int port = 1;
649 int ret;
650
651 /* Init */
652 ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
653 if (ret)
654 return ret;
655
656 /* RTR */
657 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index, &rtr_attr.dgid_attr);
658 if (ret)
659 return ret;
660
661 rtr_attr.mtu = mtu;
662 rtr_attr.qp_num = dr_qp->mqp.qpn;
663 rtr_attr.min_rnr_timer = 12;
664 rtr_attr.port_num = port;
665 rtr_attr.sgid_index = gid_index;
666 rtr_attr.udp_src_port = dmn->info.caps.roce_min_src_udp;
667
668 ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
669 if (ret)
670 return ret;
671
672 /* RTS */
673 rts_attr.timeout = 14;
674 rts_attr.retry_cnt = 7;
675 rts_attr.rnr_retry = 7;
676
677 ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
678 if (ret)
679 return ret;
680
681 return 0;
682}
683
684static void dr_cq_event(struct mlx5_core_cq *mcq,
685 enum mlx5_event event)
686{
687 pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
688}
689
690static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
691 struct mlx5_uars_page *uar,
692 size_t ncqe)
693{
694 u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
695 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
696 struct mlx5_wq_param wqp;
697 struct mlx5_cqe64 *cqe;
698 struct mlx5dr_cq *cq;
699 int inlen, err, eqn;
700 unsigned int irqn;
701 void *cqc, *in;
702 __be64 *pas;
82996995 703 int vector;
297ccceb
AV
704 u32 i;
705
706 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
707 if (!cq)
708 return NULL;
709
710 ncqe = roundup_pow_of_two(ncqe);
711 MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
712
713 wqp.buf_numa_node = mdev->priv.numa_node;
714 wqp.db_numa_node = mdev->priv.numa_node;
715
716 err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
717 &cq->wq_ctrl);
718 if (err)
719 goto out;
720
721 for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
722 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
723 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
724 }
725
726 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
727 sizeof(u64) * cq->wq_ctrl.buf.npages;
728 in = kvzalloc(inlen, GFP_KERNEL);
729 if (!in)
730 goto err_cqwq;
731
82996995
AV
732 vector = smp_processor_id() % mlx5_comp_vectors_count(mdev);
733 err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn);
297ccceb
AV
734 if (err) {
735 kvfree(in);
736 goto err_cqwq;
737 }
738
739 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
740 MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
741 MLX5_SET(cqc, cqc, c_eqn, eqn);
742 MLX5_SET(cqc, cqc, uar_page, uar->index);
743 MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
744 MLX5_ADAPTER_PAGE_SHIFT);
745 MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
746
747 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
748 mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
749
750 cq->mcq.event = dr_cq_event;
751
752 err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
753 kvfree(in);
754
755 if (err)
756 goto err_cqwq;
757
758 cq->mcq.cqe_sz = 64;
759 cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
760 cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
761 *cq->mcq.set_ci_db = 0;
762 *cq->mcq.arm_db = 0;
763 cq->mcq.vector = 0;
764 cq->mcq.irqn = irqn;
765 cq->mcq.uar = uar;
766
767 return cq;
768
769err_cqwq:
770 mlx5_wq_destroy(&cq->wq_ctrl);
771out:
772 kfree(cq);
773 return NULL;
774}
775
776static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
777{
778 mlx5_core_destroy_cq(mdev, &cq->mcq);
779 mlx5_wq_destroy(&cq->wq_ctrl);
780 kfree(cq);
781}
782
783static int
784dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey)
785{
786 u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
787 void *mkc;
788
789 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
790 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
791 MLX5_SET(mkc, mkc, a, 1);
792 MLX5_SET(mkc, mkc, rw, 1);
793 MLX5_SET(mkc, mkc, rr, 1);
794 MLX5_SET(mkc, mkc, lw, 1);
795 MLX5_SET(mkc, mkc, lr, 1);
796
797 MLX5_SET(mkc, mkc, pd, pdn);
798 MLX5_SET(mkc, mkc, length64, 1);
799 MLX5_SET(mkc, mkc, qpn, 0xffffff);
800
801 return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
802}
803
804static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
805 u32 pdn, void *buf, size_t size)
806{
807 struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
808 struct device *dma_device;
809 dma_addr_t dma_addr;
810 int err;
811
812 if (!mr)
813 return NULL;
814
815 dma_device = &mdev->pdev->dev;
816 dma_addr = dma_map_single(dma_device, buf, size,
817 DMA_BIDIRECTIONAL);
818 err = dma_mapping_error(dma_device, dma_addr);
819 if (err) {
820 mlx5_core_warn(mdev, "Can't dma buf\n");
821 kfree(mr);
822 return NULL;
823 }
824
825 err = dr_create_mkey(mdev, pdn, &mr->mkey);
826 if (err) {
827 mlx5_core_warn(mdev, "Can't create mkey\n");
828 dma_unmap_single(dma_device, dma_addr, size,
829 DMA_BIDIRECTIONAL);
830 kfree(mr);
831 return NULL;
832 }
833
834 mr->dma_addr = dma_addr;
835 mr->size = size;
836 mr->addr = buf;
837
838 return mr;
839}
840
841static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
842{
843 mlx5_core_destroy_mkey(mdev, &mr->mkey);
844 dma_unmap_single(&mdev->pdev->dev, mr->dma_addr, mr->size,
845 DMA_BIDIRECTIONAL);
846 kfree(mr);
847}
848
849int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
850{
851 struct dr_qp_init_attr init_attr = {};
852 int cq_size;
853 int size;
854 int ret;
855
856 dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
857 if (!dmn->send_ring)
858 return -ENOMEM;
859
860 cq_size = QUEUE_SIZE + 1;
861 dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
862 if (!dmn->send_ring->cq) {
863 ret = -ENOMEM;
864 goto free_send_ring;
865 }
866
867 init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
868 init_attr.pdn = dmn->pdn;
869 init_attr.uar = dmn->uar;
870 init_attr.max_send_wr = QUEUE_SIZE;
871
872 dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
873 if (!dmn->send_ring->qp) {
874 ret = -ENOMEM;
875 goto clean_cq;
876 }
877
878 dmn->send_ring->cq->qp = dmn->send_ring->qp;
879
880 dmn->info.max_send_wr = QUEUE_SIZE;
881 dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
882 DR_STE_SIZE);
883
884 dmn->send_ring->signal_th = dmn->info.max_send_wr /
885 SIGNAL_PER_DIV_QUEUE;
886
887 /* Prepare qp to be used */
888 ret = dr_prepare_qp_to_rts(dmn);
889 if (ret)
890 goto clean_qp;
891
892 dmn->send_ring->max_post_send_size =
893 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
894 DR_ICM_TYPE_STE);
895
896 /* Allocating the max size as a buffer for writing */
897 size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
898 dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
899 if (!dmn->send_ring->buf) {
900 ret = -ENOMEM;
901 goto clean_qp;
902 }
903
297ccceb
AV
904 dmn->send_ring->buf_size = size;
905
906 dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
907 dmn->pdn, dmn->send_ring->buf, size);
908 if (!dmn->send_ring->mr) {
909 ret = -ENOMEM;
910 goto free_mem;
911 }
912
913 dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
914 dmn->pdn, dmn->send_ring->sync_buff,
915 MIN_READ_SYNC);
916 if (!dmn->send_ring->sync_mr) {
917 ret = -ENOMEM;
918 goto clean_mr;
919 }
920
921 return 0;
922
923clean_mr:
924 dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
925free_mem:
926 kfree(dmn->send_ring->buf);
927clean_qp:
928 dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
929clean_cq:
930 dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
931free_send_ring:
932 kfree(dmn->send_ring);
933
934 return ret;
935}
936
937void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
938 struct mlx5dr_send_ring *send_ring)
939{
940 dr_destroy_qp(dmn->mdev, send_ring->qp);
941 dr_destroy_cq(dmn->mdev, send_ring->cq);
942 dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
943 dr_dereg_mr(dmn->mdev, send_ring->mr);
944 kfree(send_ring->buf);
945 kfree(send_ring);
946}
947
948int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
949{
950 struct mlx5dr_send_ring *send_ring = dmn->send_ring;
951 struct postsend_info send_info = {};
952 u8 data[DR_STE_SIZE];
953 int num_of_sends_req;
954 int ret;
955 int i;
956
957 /* Sending this amount of requests makes sure we will get drain */
958 num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
959
960 /* Send fake requests forcing the last to be signaled */
961 send_info.write.addr = (uintptr_t)data;
962 send_info.write.length = DR_STE_SIZE;
963 send_info.write.lkey = 0;
964 /* Using the sync_mr in order to write/read */
965 send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
966 send_info.rkey = send_ring->sync_mr->mkey.key;
967
968 for (i = 0; i < num_of_sends_req; i++) {
969 ret = dr_postsend_icm_data(dmn, &send_info);
970 if (ret)
971 return ret;
972 }
973
974 ret = dr_handle_pending_wc(dmn, send_ring);
975
976 return ret;
977}