Merge tag 'soc-dt-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-block.git] / drivers / vdpa / mlx5 / net / mlx5_vnet.c
CommitLineData
1a86b377
EC
1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
3
74c9729d 4#include <linux/module.h>
1a86b377 5#include <linux/vdpa.h>
74c9729d
LR
6#include <linux/vringh.h>
7#include <uapi/linux/virtio_net.h>
1a86b377 8#include <uapi/linux/virtio_ids.h>
a007d940 9#include <uapi/linux/vdpa.h>
1a86b377 10#include <linux/virtio_config.h>
74c9729d
LR
11#include <linux/auxiliary_bus.h>
12#include <linux/mlx5/cq.h>
1a86b377
EC
13#include <linux/mlx5/qp.h>
14#include <linux/mlx5/device.h>
74c9729d 15#include <linux/mlx5/driver.h>
1a86b377
EC
16#include <linux/mlx5/vport.h>
17#include <linux/mlx5/fs.h>
0aae392b 18#include <linux/mlx5/mlx5_ifc_vdpa.h>
7c9f131f 19#include <linux/mlx5/mpfs.h>
1a86b377 20#include "mlx5_vdpa.h"
72c67e9b 21#include "mlx5_vnet.h"
1a86b377 22
74c9729d
LR
23MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24MODULE_DESCRIPTION("Mellanox VDPA driver");
25MODULE_LICENSE("Dual BSD/GPL");
26
1a86b377 27#define VALID_FEATURES_MASK \
cbb52359
NC
28 (BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
29 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
30 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
31 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
33 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
34 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
35 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
36 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
37 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
38 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
39 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
40 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
1a86b377
EC
41
42#define VALID_STATUS_MASK \
43 (VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
44 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
45
e4fc6650
EC
46#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
47
baf2ad3f
EC
48#define MLX5V_UNTAGGED 0x1000
49
1a86b377
EC
50struct mlx5_vdpa_cq_buf {
51 struct mlx5_frag_buf_ctrl fbc;
52 struct mlx5_frag_buf frag_buf;
53 int cqe_size;
54 int nent;
55};
56
57struct mlx5_vdpa_cq {
58 struct mlx5_core_cq mcq;
59 struct mlx5_vdpa_cq_buf buf;
60 struct mlx5_db db;
61 int cqe;
62};
63
64struct mlx5_vdpa_umem {
65 struct mlx5_frag_buf_ctrl fbc;
66 struct mlx5_frag_buf frag_buf;
67 int size;
68 u32 id;
69};
70
71struct mlx5_vdpa_qp {
72 struct mlx5_core_qp mqp;
73 struct mlx5_frag_buf frag_buf;
74 struct mlx5_db db;
75 u16 head;
76 bool fw;
77};
78
79struct mlx5_vq_restore_info {
80 u32 num_ent;
81 u64 desc_addr;
82 u64 device_addr;
83 u64 driver_addr;
84 u16 avail_index;
b35ccebe 85 u16 used_index;
1a86b377 86 bool ready;
1a86b377
EC
87 bool restore;
88};
89
90struct mlx5_vdpa_virtqueue {
91 bool ready;
92 u64 desc_addr;
93 u64 device_addr;
94 u64 driver_addr;
95 u32 num_ent;
1a86b377
EC
96
97 /* Resources for implementing the notification channel from the device
98 * to the driver. fwqp is the firmware end of an RC connection; the
2f72b226 99 * other end is vqqp used by the driver. cq is where completions are
1a86b377
EC
100 * reported.
101 */
102 struct mlx5_vdpa_cq cq;
103 struct mlx5_vdpa_qp fwqp;
104 struct mlx5_vdpa_qp vqqp;
105
106 /* umem resources are required for the virtqueue operation. They're use
107 * is internal and they must be provided by the driver.
108 */
109 struct mlx5_vdpa_umem umem1;
110 struct mlx5_vdpa_umem umem2;
111 struct mlx5_vdpa_umem umem3;
112
1892a3d4 113 u32 counter_set_id;
1a86b377
EC
114 bool initialized;
115 int index;
116 u32 virtq_id;
117 struct mlx5_vdpa_net *ndev;
118 u16 avail_idx;
b35ccebe 119 u16 used_idx;
1a86b377
EC
120 int fw_state;
121
122 /* keep last in the struct */
123 struct mlx5_vq_restore_info ri;
124};
125
e4fc6650
EC
126static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
127{
f8ae3a48
EC
128 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
129 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
130 return idx < 2;
131 else
132 return idx < 3;
133 }
e4fc6650 134
f8ae3a48 135 return idx <= mvdev->max_idx;
e4fc6650
EC
136}
137
1a86b377
EC
138static void free_resources(struct mlx5_vdpa_net *ndev);
139static void init_mvqs(struct mlx5_vdpa_net *ndev);
ae0428de 140static int setup_driver(struct mlx5_vdpa_dev *mvdev);
1a86b377
EC
141static void teardown_driver(struct mlx5_vdpa_net *ndev);
142
143static bool mlx5_vdpa_debug;
144
5262912e
EC
145#define MLX5_CVQ_MAX_ENT 16
146
1a86b377
EC
147#define MLX5_LOG_VIO_FLAG(_feature) \
148 do { \
cbb52359 149 if (features & BIT_ULL(_feature)) \
1a86b377
EC
150 mlx5_vdpa_info(mvdev, "%s\n", #_feature); \
151 } while (0)
152
153#define MLX5_LOG_VIO_STAT(_status) \
154 do { \
155 if (status & (_status)) \
156 mlx5_vdpa_info(mvdev, "%s\n", #_status); \
157 } while (0)
158
52893733
EC
159/* TODO: cross-endian support */
160static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
161{
162 return virtio_legacy_is_little_endian() ||
163 (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
164}
165
166static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
167{
168 return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
169}
170
171static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
172{
173 return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
174}
175
5262912e
EC
176static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
177{
52893733
EC
178 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
179 return 2;
180
acde3929 181 return mvdev->max_vqs;
5262912e
EC
182}
183
184static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
185{
186 return idx == ctrl_vq_idx(mvdev);
187}
188
1a86b377
EC
189static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
190{
191 if (status & ~VALID_STATUS_MASK)
192 mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
193 status & ~VALID_STATUS_MASK);
194
195 if (!mlx5_vdpa_debug)
196 return;
197
198 mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
199 if (set && !status) {
200 mlx5_vdpa_info(mvdev, "driver resets the device\n");
201 return;
202 }
203
204 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
205 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
206 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
207 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
208 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
209 MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
210}
211
212static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
213{
214 if (features & ~VALID_FEATURES_MASK)
215 mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
216 features & ~VALID_FEATURES_MASK);
217
218 if (!mlx5_vdpa_debug)
219 return;
220
221 mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
222 if (!features)
223 mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
224
225 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
226 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
227 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
228 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
229 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
230 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
231 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
232 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
233 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
234 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
235 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
236 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
237 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
238 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
239 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
240 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
241 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
242 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
243 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
244 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
245 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
246 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
247 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
248 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
249 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
250 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
251 MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
252 MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
253 MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
254 MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
255 MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
256 MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
257 MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
258 MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
259}
260
261static int create_tis(struct mlx5_vdpa_net *ndev)
262{
263 struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
264 u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
265 void *tisc;
266 int err;
267
268 tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
269 MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
270 err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
271 if (err)
272 mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
273
274 return err;
275}
276
277static void destroy_tis(struct mlx5_vdpa_net *ndev)
278{
279 mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
280}
281
282#define MLX5_VDPA_CQE_SIZE 64
283#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
284
285static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
286{
287 struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
288 u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
289 u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
290 int err;
291
292 err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
293 ndev->mvdev.mdev->priv.numa_node);
294 if (err)
295 return err;
296
297 mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
298
299 buf->cqe_size = MLX5_VDPA_CQE_SIZE;
300 buf->nent = nent;
301
302 return 0;
303}
304
305static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
306{
307 struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
308
309 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
310 ndev->mvdev.mdev->priv.numa_node);
311}
312
313static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
314{
315 mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
316}
317
318static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
319{
320 return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
321}
322
323static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
324{
325 struct mlx5_cqe64 *cqe64;
326 void *cqe;
327 int i;
328
329 for (i = 0; i < buf->nent; i++) {
330 cqe = get_cqe(vcq, i);
331 cqe64 = cqe;
332 cqe64->op_own = MLX5_CQE_INVALID << 4;
333 }
334}
335
336static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
337{
338 struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
339
340 if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
341 !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
342 return cqe64;
343
344 return NULL;
345}
346
347static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
348{
349 vqp->head += n;
350 vqp->db.db[0] = cpu_to_be32(vqp->head);
351}
352
353static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
354 struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
355{
356 struct mlx5_vdpa_qp *vqp;
357 __be64 *pas;
358 void *qpc;
359
360 vqp = fw ? &mvq->fwqp : &mvq->vqqp;
361 MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
362 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
363 if (vqp->fw) {
364 /* Firmware QP is allocated by the driver for the firmware's
365 * use so we can skip part of the params as they will be chosen by firmware
366 */
367 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
368 MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
369 MLX5_SET(qpc, qpc, no_sq, 1);
370 return;
371 }
372
373 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
374 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
375 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
376 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
377 MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
378 MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
379 MLX5_SET(qpc, qpc, no_sq, 1);
380 MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
381 MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
382 MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
383 pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
384 mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
385}
386
387static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
388{
389 return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
390 num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
391 ndev->mvdev.mdev->priv.numa_node);
392}
393
394static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
395{
396 mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
397}
398
399static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
400 struct mlx5_vdpa_qp *vqp)
401{
402 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
403 int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
404 u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
405 void *qpc;
406 void *in;
407 int err;
408
409 if (!vqp->fw) {
410 vqp = &mvq->vqqp;
411 err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
412 if (err)
413 return err;
414
415 err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
416 if (err)
417 goto err_db;
418 inlen += vqp->frag_buf.npages * sizeof(__be64);
419 }
420
421 in = kzalloc(inlen, GFP_KERNEL);
422 if (!in) {
423 err = -ENOMEM;
424 goto err_kzalloc;
425 }
426
427 qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
428 qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
429 MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
430 MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
431 MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
432 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
433 if (!vqp->fw)
434 MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
435 MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
436 err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
437 kfree(in);
438 if (err)
439 goto err_kzalloc;
440
441 vqp->mqp.uid = ndev->mvdev.res.uid;
442 vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
443
444 if (!vqp->fw)
445 rx_post(vqp, mvq->num_ent);
446
447 return 0;
448
449err_kzalloc:
450 if (!vqp->fw)
451 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
452err_db:
453 if (!vqp->fw)
454 rq_buf_free(ndev, vqp);
455
456 return err;
457}
458
459static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
460{
461 u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
462
463 MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
464 MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
465 MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
466 if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
467 mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
468 if (!vqp->fw) {
469 mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
470 rq_buf_free(ndev, vqp);
471 }
472}
473
474static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
475{
476 return get_sw_cqe(cq, cq->mcq.cons_index);
477}
478
479static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
480{
481 struct mlx5_cqe64 *cqe64;
482
483 cqe64 = next_cqe_sw(vcq);
484 if (!cqe64)
485 return -EAGAIN;
486
487 vcq->mcq.cons_index++;
488 return 0;
489}
490
491static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
492{
db296d25
EC
493 struct mlx5_vdpa_net *ndev = mvq->ndev;
494 struct vdpa_callback *event_cb;
495
496 event_cb = &ndev->event_cbs[mvq->index];
1a86b377 497 mlx5_cq_set_ci(&mvq->cq.mcq);
83ef73b2
EC
498
499 /* make sure CQ cosumer update is visible to the hardware before updating
500 * RX doorbell record.
501 */
502 dma_wmb();
1a86b377 503 rx_post(&mvq->vqqp, num);
db296d25
EC
504 if (event_cb->callback)
505 event_cb->callback(event_cb->private);
1a86b377
EC
506}
507
508static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
509{
510 struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
511 struct mlx5_vdpa_net *ndev = mvq->ndev;
512 void __iomem *uar_page = ndev->mvdev.res.uar->map;
513 int num = 0;
514
515 while (!mlx5_vdpa_poll_one(&mvq->cq)) {
516 num++;
517 if (num > mvq->num_ent / 2) {
518 /* If completions keep coming while we poll, we want to
519 * let the hardware know that we consumed them by
520 * updating the doorbell record. We also let vdpa core
521 * know about this so it passes it on the virtio driver
522 * on the guest.
523 */
524 mlx5_vdpa_handle_completions(mvq, num);
525 num = 0;
526 }
527 }
528
529 if (num)
530 mlx5_vdpa_handle_completions(mvq, num);
531
532 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
533}
534
535static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
536{
537 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
538 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
539 void __iomem *uar_page = ndev->mvdev.res.uar->map;
540 u32 out[MLX5_ST_SZ_DW(create_cq_out)];
541 struct mlx5_vdpa_cq *vcq = &mvq->cq;
1a86b377
EC
542 __be64 *pas;
543 int inlen;
544 void *cqc;
545 void *in;
546 int err;
547 int eqn;
548
549 err = mlx5_db_alloc(mdev, &vcq->db);
550 if (err)
551 return err;
552
553 vcq->mcq.set_ci_db = vcq->db.db;
554 vcq->mcq.arm_db = vcq->db.db + 1;
555 vcq->mcq.cqe_sz = 64;
556
557 err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
558 if (err)
559 goto err_db;
560
561 cq_frag_buf_init(vcq, &vcq->buf);
562
563 inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
564 MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
565 in = kzalloc(inlen, GFP_KERNEL);
566 if (!in) {
567 err = -ENOMEM;
568 goto err_vzalloc;
569 }
570
571 MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
572 pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
573 mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
574
575 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
576 MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
577
578 /* Use vector 0 by default. Consider adding code to choose least used
579 * vector.
580 */
563476ae 581 err = mlx5_vector2eqn(mdev, 0, &eqn);
1a86b377
EC
582 if (err)
583 goto err_vec;
584
585 cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
586 MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
587 MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
616d5769 588 MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1a86b377
EC
589 MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
590
591 err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
592 if (err)
593 goto err_vec;
594
595 vcq->mcq.comp = mlx5_vdpa_cq_comp;
596 vcq->cqe = num_ent;
597 vcq->mcq.set_ci_db = vcq->db.db;
598 vcq->mcq.arm_db = vcq->db.db + 1;
599 mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
600 kfree(in);
601 return 0;
602
603err_vec:
604 kfree(in);
605err_vzalloc:
606 cq_frag_buf_free(ndev, &vcq->buf);
607err_db:
608 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
609 return err;
610}
611
612static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
613{
614 struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
615 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
616 struct mlx5_vdpa_cq *vcq = &mvq->cq;
617
618 if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
619 mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
620 return;
621 }
622 cq_frag_buf_free(ndev, &vcq->buf);
623 mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
624}
625
71ab6a7c
EC
626static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
627 struct mlx5_vdpa_umem **umemp)
1a86b377
EC
628{
629 struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
630 int p_a;
631 int p_b;
632
633 switch (num) {
634 case 1:
635 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_a);
636 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_1_buffer_param_b);
637 *umemp = &mvq->umem1;
638 break;
639 case 2:
640 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_a);
641 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_2_buffer_param_b);
642 *umemp = &mvq->umem2;
643 break;
644 case 3:
645 p_a = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_a);
646 p_b = MLX5_CAP_DEV_VDPA_EMULATION(mdev, umem_3_buffer_param_b);
647 *umemp = &mvq->umem3;
648 break;
649 }
71ab6a7c 650 (*umemp)->size = p_a * mvq->num_ent + p_b;
1a86b377
EC
651}
652
653static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
654{
655 mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
656}
657
658static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
659{
660 int inlen;
661 u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
662 void *um;
663 void *in;
664 int err;
665 __be64 *pas;
1a86b377
EC
666 struct mlx5_vdpa_umem *umem;
667
71ab6a7c
EC
668 set_umem_size(ndev, mvq, num, &umem);
669 err = umem_frag_buf_alloc(ndev, umem, umem->size);
1a86b377
EC
670 if (err)
671 return err;
672
673 inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
674
675 in = kzalloc(inlen, GFP_KERNEL);
676 if (!in) {
677 err = -ENOMEM;
678 goto err_in;
679 }
680
681 MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
682 MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
683 um = MLX5_ADDR_OF(create_umem_in, in, umem);
684 MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
685 MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
686
687 pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
688 mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
689
690 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
691 if (err) {
692 mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
693 goto err_cmd;
694 }
695
696 kfree(in);
697 umem->id = MLX5_GET(create_umem_out, out, umem_id);
698
699 return 0;
700
701err_cmd:
702 kfree(in);
703err_in:
704 umem_frag_buf_free(ndev, umem);
705 return err;
706}
707
708static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
709{
710 u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
711 u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
712 struct mlx5_vdpa_umem *umem;
713
714 switch (num) {
715 case 1:
716 umem = &mvq->umem1;
717 break;
718 case 2:
719 umem = &mvq->umem2;
720 break;
721 case 3:
722 umem = &mvq->umem3;
723 break;
724 }
725
726 MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
727 MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
728 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
729 return;
730
731 umem_frag_buf_free(ndev, umem);
732}
733
734static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
735{
736 int num;
737 int err;
738
739 for (num = 1; num <= 3; num++) {
740 err = create_umem(ndev, mvq, num);
741 if (err)
742 goto err_umem;
743 }
744 return 0;
745
746err_umem:
747 for (num--; num > 0; num--)
748 umem_destroy(ndev, mvq, num);
749
750 return err;
751}
752
753static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
754{
755 int num;
756
757 for (num = 3; num > 0; num--)
758 umem_destroy(ndev, mvq, num);
759}
760
761static int get_queue_type(struct mlx5_vdpa_net *ndev)
762{
763 u32 type_mask;
764
765 type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
766
767 /* prefer split queue */
879753c8
EC
768 if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
769 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
1a86b377 770
879753c8 771 WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
1a86b377 772
879753c8 773 return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
1a86b377
EC
774}
775
776static bool vq_is_tx(u16 idx)
777{
778 return idx % 2;
779}
780
781static u16 get_features_12_3(u64 features)
782{
cbb52359
NC
783 return (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << 9) |
784 (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << 8) |
785 (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << 7) |
786 (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_CSUM)) << 6);
1a86b377
EC
787}
788
1892a3d4
EC
789static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
790{
791 return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
792 BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
793}
794
1a86b377
EC
795static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
796{
797 int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
798 u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
799 void *obj_context;
800 void *cmd_hdr;
801 void *vq_ctx;
802 void *in;
803 int err;
804
805 err = umems_create(ndev, mvq);
806 if (err)
807 return err;
808
809 in = kzalloc(inlen, GFP_KERNEL);
810 if (!in) {
811 err = -ENOMEM;
812 goto err_alloc;
813 }
814
815 cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
816
817 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
818 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
819 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
820
821 obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
822 MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
b35ccebe 823 MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
1a86b377
EC
824 MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
825 get_features_12_3(ndev->mvdev.actual_features));
826 vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
827 MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
828
829 if (vq_is_tx(mvq->index))
830 MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
831
832 MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
833 MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
834 MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
835 MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
836 MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
4b454a82 837 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
1a86b377
EC
838 MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
839 MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
840 MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
83fec3f1 841 MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
1a86b377
EC
842 MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
843 MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
844 MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
e3011776 845 MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
1a86b377 846 MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
e3011776 847 MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
1a86b377 848 MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
1892a3d4
EC
849 if (counters_supported(&ndev->mvdev))
850 MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
1a86b377
EC
851
852 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
853 if (err)
854 goto err_cmd;
855
cae15c2e 856 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
1a86b377
EC
857 kfree(in);
858 mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
859
860 return 0;
861
862err_cmd:
863 kfree(in);
864err_alloc:
865 umems_destroy(ndev, mvq);
866 return err;
867}
868
869static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
870{
871 u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
872 u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
873
874 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
875 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
876 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
877 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
878 MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
879 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
880 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
881 mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
882 return;
883 }
cae15c2e 884 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1a86b377
EC
885 umems_destroy(ndev, mvq);
886}
887
888static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
889{
890 return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
891}
892
893static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
894{
895 return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
896}
897
898static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
899 int *outlen, u32 qpn, u32 rqpn)
900{
901 void *qpc;
902 void *pp;
903
904 switch (cmd) {
905 case MLX5_CMD_OP_2RST_QP:
906 *inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
907 *outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
908 *in = kzalloc(*inlen, GFP_KERNEL);
909 *out = kzalloc(*outlen, GFP_KERNEL);
f31231bf 910 if (!*in || !*out)
1a86b377
EC
911 goto outerr;
912
913 MLX5_SET(qp_2rst_in, *in, opcode, cmd);
914 MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
915 MLX5_SET(qp_2rst_in, *in, qpn, qpn);
916 break;
917 case MLX5_CMD_OP_RST2INIT_QP:
918 *inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
919 *outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
920 *in = kzalloc(*inlen, GFP_KERNEL);
921 *out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
f31231bf 922 if (!*in || !*out)
1a86b377
EC
923 goto outerr;
924
925 MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
926 MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
927 MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
928 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
929 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
930 MLX5_SET(qpc, qpc, rwe, 1);
931 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
932 MLX5_SET(ads, pp, vhca_port_num, 1);
933 break;
934 case MLX5_CMD_OP_INIT2RTR_QP:
935 *inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
936 *outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
937 *in = kzalloc(*inlen, GFP_KERNEL);
938 *out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
f31231bf 939 if (!*in || !*out)
1a86b377
EC
940 goto outerr;
941
942 MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
943 MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
944 MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
945 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
946 MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
947 MLX5_SET(qpc, qpc, log_msg_max, 30);
948 MLX5_SET(qpc, qpc, remote_qpn, rqpn);
949 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
950 MLX5_SET(ads, pp, fl, 1);
951 break;
952 case MLX5_CMD_OP_RTR2RTS_QP:
953 *inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
954 *outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
955 *in = kzalloc(*inlen, GFP_KERNEL);
956 *out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
f31231bf 957 if (!*in || !*out)
1a86b377
EC
958 goto outerr;
959
960 MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
961 MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
962 MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
963 qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
964 pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
965 MLX5_SET(ads, pp, ack_timeout, 14);
966 MLX5_SET(qpc, qpc, retry_count, 7);
967 MLX5_SET(qpc, qpc, rnr_retry, 7);
968 break;
969 default:
f31231bf 970 goto outerr_nullify;
1a86b377 971 }
1a86b377
EC
972
973 return;
974
975outerr:
976 kfree(*in);
977 kfree(*out);
f31231bf 978outerr_nullify:
1a86b377
EC
979 *in = NULL;
980 *out = NULL;
981}
982
983static void free_inout(void *in, void *out)
984{
985 kfree(in);
986 kfree(out);
987}
988
989/* Two QPs are used by each virtqueue. One is used by the driver and one by
990 * firmware. The fw argument indicates whether the subjected QP is the one used
991 * by firmware.
992 */
993static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
994{
995 int outlen;
996 int inlen;
997 void *out;
998 void *in;
999 int err;
1000
1001 alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1002 if (!in || !out)
1003 return -ENOMEM;
1004
1005 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1006 free_inout(in, out);
1007 return err;
1008}
1009
1010static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1011{
1012 int err;
1013
1014 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1015 if (err)
1016 return err;
1017
1018 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1019 if (err)
1020 return err;
1021
1022 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1023 if (err)
1024 return err;
1025
1026 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1027 if (err)
1028 return err;
1029
1030 err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1031 if (err)
1032 return err;
1033
1034 err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1035 if (err)
1036 return err;
1037
1038 return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1039}
1040
1041struct mlx5_virtq_attr {
1042 u8 state;
1043 u16 available_index;
b35ccebe 1044 u16 used_index;
1a86b377
EC
1045};
1046
1047static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1048 struct mlx5_virtq_attr *attr)
1049{
1050 int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1051 u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1052 void *out;
1053 void *obj_context;
1054 void *cmd_hdr;
1055 int err;
1056
1057 out = kzalloc(outlen, GFP_KERNEL);
1058 if (!out)
1059 return -ENOMEM;
1060
1061 cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1062
1063 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1064 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1065 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1066 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1067 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1068 if (err)
1069 goto err_cmd;
1070
1071 obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1072 memset(attr, 0, sizeof(*attr));
1073 attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1074 attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
b35ccebe 1075 attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1a86b377
EC
1076 kfree(out);
1077 return 0;
1078
1079err_cmd:
1080 kfree(out);
1081 return err;
1082}
1083
cae15c2e
EC
1084static bool is_valid_state_change(int oldstate, int newstate)
1085{
1086 switch (oldstate) {
1087 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1088 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1089 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1090 return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1091 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1092 case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1093 default:
1094 return false;
1095 }
1096}
1097
1a86b377
EC
1098static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1099{
1100 int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1101 u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1102 void *obj_context;
1103 void *cmd_hdr;
1104 void *in;
1105 int err;
1106
cae15c2e
EC
1107 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1108 return 0;
1109
1110 if (!is_valid_state_change(mvq->fw_state, state))
1111 return -EINVAL;
1112
1a86b377
EC
1113 in = kzalloc(inlen, GFP_KERNEL);
1114 if (!in)
1115 return -ENOMEM;
1116
1117 cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1118
1119 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1120 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1121 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1122 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1123
1124 obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1125 MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1126 MLX5_VIRTQ_MODIFY_MASK_STATE);
1127 MLX5_SET(virtio_net_q_object, obj_context, state, state);
1128 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1129 kfree(in);
1130 if (!err)
1131 mvq->fw_state = state;
1132
1133 return err;
1134}
1135
1892a3d4
EC
1136static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1137{
1138 u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1139 u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1140 void *cmd_hdr;
1141 int err;
1142
1143 if (!counters_supported(&ndev->mvdev))
1144 return 0;
1145
1146 cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1147
1148 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1149 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1150 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1151
1152 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1153 if (err)
1154 return err;
1155
1156 mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1157
1158 return 0;
1159}
1160
1161static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1162{
1163 u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1164 u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1165
1166 if (!counters_supported(&ndev->mvdev))
1167 return;
1168
1169 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1170 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1171 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1172 MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1173 if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1174 mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1175}
1176
1a86b377
EC
1177static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1178{
1179 u16 idx = mvq->index;
1180 int err;
1181
1182 if (!mvq->num_ent)
1183 return 0;
1184
52893733
EC
1185 if (mvq->initialized)
1186 return 0;
1a86b377
EC
1187
1188 err = cq_create(ndev, idx, mvq->num_ent);
1189 if (err)
1190 return err;
1191
1192 err = qp_create(ndev, mvq, &mvq->fwqp);
1193 if (err)
1194 goto err_fwqp;
1195
1196 err = qp_create(ndev, mvq, &mvq->vqqp);
1197 if (err)
1198 goto err_vqqp;
1199
1200 err = connect_qps(ndev, mvq);
1201 if (err)
1202 goto err_connect;
1203
1892a3d4
EC
1204 err = counter_set_alloc(ndev, mvq);
1205 if (err)
1206 goto err_counter;
1207
1a86b377
EC
1208 err = create_virtqueue(ndev, mvq);
1209 if (err)
1210 goto err_connect;
1211
1212 if (mvq->ready) {
1213 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1214 if (err) {
1215 mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1216 idx, err);
1217 goto err_connect;
1218 }
1219 }
1220
1221 mvq->initialized = true;
1222 return 0;
1223
1224err_connect:
1892a3d4
EC
1225 counter_set_dealloc(ndev, mvq);
1226err_counter:
1a86b377
EC
1227 qp_destroy(ndev, &mvq->vqqp);
1228err_vqqp:
1229 qp_destroy(ndev, &mvq->fwqp);
1230err_fwqp:
1231 cq_destroy(ndev, idx);
1232 return err;
1233}
1234
1235static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1236{
1237 struct mlx5_virtq_attr attr;
1238
1239 if (!mvq->initialized)
1240 return;
1241
1a86b377
EC
1242 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1243 return;
1244
1245 if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1246 mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
3176e974
SWL
1247
1248 if (query_virtqueue(ndev, mvq, &attr)) {
1249 mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1250 return;
1251 }
1252 mvq->avail_idx = attr.available_index;
bc04d93e 1253 mvq->used_idx = attr.used_index;
1a86b377
EC
1254}
1255
1256static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1257{
1258 int i;
1259
75560522 1260 for (i = 0; i < ndev->mvdev.max_vqs; i++)
1a86b377
EC
1261 suspend_vq(ndev, &ndev->vqs[i]);
1262}
1263
1264static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1265{
1266 if (!mvq->initialized)
1267 return;
1268
1269 suspend_vq(ndev, mvq);
1270 destroy_virtqueue(ndev, mvq);
1892a3d4 1271 counter_set_dealloc(ndev, mvq);
1a86b377
EC
1272 qp_destroy(ndev, &mvq->vqqp);
1273 qp_destroy(ndev, &mvq->fwqp);
1274 cq_destroy(ndev, mvq->index);
1275 mvq->initialized = false;
1276}
1277
1278static int create_rqt(struct mlx5_vdpa_net *ndev)
1279{
a43ae805
EC
1280 int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1281 int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1a86b377
EC
1282 __be32 *list;
1283 void *rqtc;
1284 int inlen;
1285 void *in;
1286 int i, j;
1287 int err;
75560522 1288
a43ae805 1289 inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1a86b377
EC
1290 in = kzalloc(inlen, GFP_KERNEL);
1291 if (!in)
1292 return -ENOMEM;
1293
1294 MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1295 rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1296
1297 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
a43ae805 1298 MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1a86b377 1299 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
a43ae805 1300 for (i = 0, j = 0; i < act_sz; i++, j += 2)
acde3929 1301 list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1a86b377 1302
a43ae805 1303 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1a86b377
EC
1304 err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1305 kfree(in);
1306 if (err)
1307 return err;
1308
1309 return 0;
1310}
1311
52893733
EC
1312#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1313
1314static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1315{
a43ae805 1316 int act_sz = roundup_pow_of_two(num / 2);
52893733 1317 __be32 *list;
52893733
EC
1318 void *rqtc;
1319 int inlen;
1320 void *in;
1321 int i, j;
1322 int err;
1323
a43ae805 1324 inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
52893733
EC
1325 in = kzalloc(inlen, GFP_KERNEL);
1326 if (!in)
1327 return -ENOMEM;
1328
1329 MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1330 MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1331 rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1332 MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1333
1334 list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
a43ae805 1335 for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
a7f46ba4 1336 list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
52893733 1337
a43ae805 1338 MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
52893733
EC
1339 err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1340 kfree(in);
1341 if (err)
1342 return err;
1343
1344 return 0;
1345}
1346
1a86b377
EC
1347static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1348{
1349 mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1350}
1351
1352static int create_tir(struct mlx5_vdpa_net *ndev)
1353{
1354#define HASH_IP_L4PORTS \
1355 (MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
1356 MLX5_HASH_FIELD_SEL_L4_DPORT)
1357 static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1358 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1359 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1360 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1361 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1362 void *rss_key;
1363 void *outer;
1364 void *tirc;
1365 void *in;
1366 int err;
1367
1368 in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1369 if (!in)
1370 return -ENOMEM;
1371
1372 MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1373 tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1374 MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1375
1376 MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1377 MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1378 rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1379 memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1380
1381 outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1382 MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1383 MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1384 MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1385
1386 MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1387 MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1388
1389 err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1390 kfree(in);
29422100
EC
1391 if (err)
1392 return err;
1393
1394 mlx5_vdpa_add_tirn(ndev);
1a86b377
EC
1395 return err;
1396}
1397
1398static void destroy_tir(struct mlx5_vdpa_net *ndev)
1399{
29422100 1400 mlx5_vdpa_remove_tirn(ndev);
1a86b377
EC
1401 mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1402}
1403
baf2ad3f
EC
1404#define MAX_STEERING_ENT 0x8000
1405#define MAX_STEERING_GROUPS 2
1406
0a599750
EC
1407#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1408 #define NUM_DESTS 2
1409#else
1410 #define NUM_DESTS 1
1411#endif
1412
1413static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1414 struct macvlan_node *node,
1415 struct mlx5_flow_act *flow_act,
1416 struct mlx5_flow_destination *dests)
1417{
1418#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1419 int err;
1420
1421 node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1422 if (IS_ERR(node->ucast_counter.counter))
1423 return PTR_ERR(node->ucast_counter.counter);
1424
1425 node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1426 if (IS_ERR(node->mcast_counter.counter)) {
1427 err = PTR_ERR(node->mcast_counter.counter);
1428 goto err_mcast_counter;
1429 }
1430
1431 dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1432 flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1433 return 0;
1434
1435err_mcast_counter:
1436 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1437 return err;
1438#else
1439 return 0;
1440#endif
1441}
1442
1443static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1444 struct macvlan_node *node)
1445{
1446#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1447 mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1448 mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1449#endif
1450}
1451
baf2ad3f 1452static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
0a599750 1453 struct macvlan_node *node)
1a86b377 1454{
0a599750 1455 struct mlx5_flow_destination dests[NUM_DESTS] = {};
1a86b377 1456 struct mlx5_flow_act flow_act = {};
540061ac
EC
1457 struct mlx5_flow_spec *spec;
1458 void *headers_c;
1459 void *headers_v;
1460 u8 *dmac_c;
1461 u8 *dmac_v;
1a86b377 1462 int err;
0a599750 1463 u16 vid;
1a86b377 1464
540061ac
EC
1465 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1466 if (!spec)
1467 return -ENOMEM;
1468
0a599750 1469 vid = key2vid(node->macvlan);
540061ac 1470 spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
540061ac 1471 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
540061ac 1472 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
baf2ad3f 1473 dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
540061ac 1474 dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
71aa95a6 1475 eth_broadcast_addr(dmac_c);
baf2ad3f 1476 ether_addr_copy(dmac_v, mac);
a6ce72c0
EC
1477 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1478 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1479 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1480 }
0a599750 1481 if (node->tagged) {
baf2ad3f 1482 MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
a6ce72c0 1483 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
baf2ad3f 1484 }
7becdd13 1485 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
0a599750
EC
1486 dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1487 dests[0].tir_num = ndev->res.tirn;
1488 err = add_steering_counters(ndev, node, &flow_act, dests);
1489 if (err)
1490 goto out_free;
1491
1492#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1493 dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1494#endif
1495 node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1496 if (IS_ERR(node->ucast_rule)) {
1497 err = PTR_ERR(node->ucast_rule);
1498 goto err_ucast;
1499 }
540061ac 1500
0a599750
EC
1501#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1502 dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1503#endif
1a86b377 1504
540061ac
EC
1505 memset(dmac_c, 0, ETH_ALEN);
1506 memset(dmac_v, 0, ETH_ALEN);
1507 dmac_c[0] = 1;
1508 dmac_v[0] = 1;
0a599750
EC
1509 node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1510 if (IS_ERR(node->mcast_rule)) {
1511 err = PTR_ERR(node->mcast_rule);
baf2ad3f 1512 goto err_mcast;
1a86b377 1513 }
0a599750
EC
1514 kvfree(spec);
1515 mlx5_vdpa_add_rx_counters(ndev, node);
1a86b377
EC
1516 return 0;
1517
baf2ad3f 1518err_mcast:
0a599750
EC
1519 mlx5_del_flow_rules(node->ucast_rule);
1520err_ucast:
1521 remove_steering_counters(ndev, node);
1522out_free:
1523 kvfree(spec);
1a86b377
EC
1524 return err;
1525}
1526
baf2ad3f 1527static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
0a599750 1528 struct macvlan_node *node)
1a86b377 1529{
0a599750
EC
1530 mlx5_vdpa_remove_rx_counters(ndev, node);
1531 mlx5_del_flow_rules(node->ucast_rule);
1532 mlx5_del_flow_rules(node->mcast_rule);
baf2ad3f
EC
1533}
1534
1535static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1536{
1537 u64 val;
1538
1539 if (!tagged)
1540 vlan = MLX5V_UNTAGGED;
1541
1542 val = (u64)vlan << 48 |
1543 (u64)mac[0] << 40 |
1544 (u64)mac[1] << 32 |
1545 (u64)mac[2] << 24 |
1546 (u64)mac[3] << 16 |
1547 (u64)mac[4] << 8 |
1548 (u64)mac[5];
1549
1550 return val;
1551}
1552
1553static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1554{
1555 struct macvlan_node *pos;
1556 u32 idx;
1557
1558 idx = hash_64(value, 8); // tbd 8
1559 hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1560 if (pos->macvlan == value)
1561 return pos;
1562 }
1563 return NULL;
1564}
1565
0a599750 1566static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
baf2ad3f
EC
1567{
1568 struct macvlan_node *ptr;
1569 u64 val;
1570 u32 idx;
1571 int err;
1572
0a599750 1573 val = search_val(mac, vid, tagged);
baf2ad3f
EC
1574 if (mac_vlan_lookup(ndev, val))
1575 return -EEXIST;
1576
1577 ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1578 if (!ptr)
1579 return -ENOMEM;
1580
0a599750
EC
1581 ptr->tagged = tagged;
1582 ptr->macvlan = val;
1583 ptr->ndev = ndev;
1584 err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
baf2ad3f
EC
1585 if (err)
1586 goto err_add;
1587
baf2ad3f
EC
1588 idx = hash_64(val, 8);
1589 hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1590 return 0;
1591
1592err_add:
1593 kfree(ptr);
1594 return err;
1595}
1596
1597static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1598{
1599 struct macvlan_node *ptr;
1600
1601 ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1602 if (!ptr)
1a86b377
EC
1603 return;
1604
baf2ad3f 1605 hlist_del(&ptr->hlist);
0a599750
EC
1606 mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1607 remove_steering_counters(ndev, ptr);
baf2ad3f
EC
1608 kfree(ptr);
1609}
1610
1611static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1612{
1613 struct macvlan_node *pos;
1614 struct hlist_node *n;
1615 int i;
1616
1617 for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1618 hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1619 hlist_del(&pos->hlist);
0a599750
EC
1620 mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1621 remove_steering_counters(ndev, pos);
baf2ad3f
EC
1622 kfree(pos);
1623 }
1624 }
1625}
1626
1627static int setup_steering(struct mlx5_vdpa_net *ndev)
1628{
1629 struct mlx5_flow_table_attr ft_attr = {};
1630 struct mlx5_flow_namespace *ns;
1631 int err;
1632
1633 ft_attr.max_fte = MAX_STEERING_ENT;
1634 ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1635
1636 ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1637 if (!ns) {
1638 mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1639 return -EOPNOTSUPP;
1640 }
1641
1642 ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1643 if (IS_ERR(ndev->rxft)) {
1644 mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1645 return PTR_ERR(ndev->rxft);
1646 }
29422100 1647 mlx5_vdpa_add_rx_flow_table(ndev);
baf2ad3f
EC
1648
1649 err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1650 if (err)
1651 goto err_add;
1652
1653 return 0;
1654
1655err_add:
29422100 1656 mlx5_vdpa_remove_rx_flow_table(ndev);
baf2ad3f
EC
1657 mlx5_destroy_flow_table(ndev->rxft);
1658 return err;
1659}
1660
1661static void teardown_steering(struct mlx5_vdpa_net *ndev)
1662{
1663 clear_mac_vlan_table(ndev);
29422100 1664 mlx5_vdpa_remove_rx_flow_table(ndev);
1a86b377 1665 mlx5_destroy_flow_table(ndev->rxft);
1a86b377
EC
1666}
1667
5262912e
EC
1668static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1669{
1670 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1671 struct mlx5_control_vq *cvq = &mvdev->cvq;
1672 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1673 struct mlx5_core_dev *pfmdev;
1674 size_t read;
f1781bed 1675 u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
5262912e
EC
1676
1677 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1678 switch (cmd) {
1679 case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1680 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1681 if (read != ETH_ALEN)
1682 break;
1683
1684 if (!memcmp(ndev->config.mac, mac, 6)) {
1685 status = VIRTIO_NET_OK;
1686 break;
1687 }
1688
f1781bed
MQ
1689 if (is_zero_ether_addr(mac))
1690 break;
1691
5262912e
EC
1692 if (!is_zero_ether_addr(ndev->config.mac)) {
1693 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1694 mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1695 ndev->config.mac);
1696 break;
1697 }
1698 }
1699
1700 if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1701 mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1702 mac);
1703 break;
1704 }
1705
f1781bed
MQ
1706 /* backup the original mac address so that if failed to add the forward rules
1707 * we could restore it
1708 */
1709 memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1710
5262912e 1711 memcpy(ndev->config.mac, mac, ETH_ALEN);
f1781bed
MQ
1712
1713 /* Need recreate the flow table entry, so that the packet could forward back
1714 */
1ab53760 1715 mac_vlan_del(ndev, mac_back, 0, false);
f1781bed 1716
baf2ad3f 1717 if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
f1781bed
MQ
1718 mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1719
1720 /* Although it hardly run here, we still need double check */
1721 if (is_zero_ether_addr(mac_back)) {
1722 mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1723 break;
1724 }
1725
1726 /* Try to restore original mac address to MFPS table, and try to restore
1727 * the forward rule entry.
1728 */
1729 if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1730 mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1731 ndev->config.mac);
1732 }
1733
1734 if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1735 mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1736 mac_back);
1737 }
1738
1739 memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1740
baf2ad3f 1741 if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
f1781bed
MQ
1742 mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1743
1744 break;
1745 }
1746
5262912e
EC
1747 status = VIRTIO_NET_OK;
1748 break;
1749
1750 default:
1751 break;
1752 }
1753
1754 return status;
1755}
1756
52893733
EC
1757static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1758{
1759 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1760 int cur_qps = ndev->cur_num_vqs / 2;
1761 int err;
1762 int i;
1763
1764 if (cur_qps > newqps) {
1765 err = modify_rqt(ndev, 2 * newqps);
1766 if (err)
1767 return err;
1768
1769 for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1770 teardown_vq(ndev, &ndev->vqs[i]);
1771
1772 ndev->cur_num_vqs = 2 * newqps;
1773 } else {
1774 ndev->cur_num_vqs = 2 * newqps;
1775 for (i = cur_qps * 2; i < 2 * newqps; i++) {
1776 err = setup_vq(ndev, &ndev->vqs[i]);
1777 if (err)
1778 goto clean_added;
1779 }
1780 err = modify_rqt(ndev, 2 * newqps);
1781 if (err)
1782 goto clean_added;
1783 }
1784 return 0;
1785
1786clean_added:
37e07e70 1787 for (--i; i >= 2 * cur_qps; --i)
52893733
EC
1788 teardown_vq(ndev, &ndev->vqs[i]);
1789
37e07e70
EC
1790 ndev->cur_num_vqs = 2 * cur_qps;
1791
52893733
EC
1792 return err;
1793}
1794
1795static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1796{
1797 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1798 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1799 struct mlx5_control_vq *cvq = &mvdev->cvq;
1800 struct virtio_net_ctrl_mq mq;
1801 size_t read;
1802 u16 newqps;
1803
1804 switch (cmd) {
1805 case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
ed0f849f
SWL
1806 /* This mq feature check aligns with pre-existing userspace
1807 * implementation.
1808 *
1809 * Without it, an untrusted driver could fake a multiqueue config
1810 * request down to a non-mq device that may cause kernel to
1811 * panic due to uninitialized resources for extra vqs. Even with
1812 * a well behaving guest driver, it is not expected to allow
1813 * changing the number of vqs on a non-mq device.
1814 */
1815 if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1816 break;
1817
52893733
EC
1818 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1819 if (read != sizeof(mq))
1820 break;
1821
1822 newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
ed0f849f 1823 if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
acde3929 1824 newqps > ndev->rqt_size)
ed0f849f
SWL
1825 break;
1826
52893733
EC
1827 if (ndev->cur_num_vqs == 2 * newqps) {
1828 status = VIRTIO_NET_OK;
1829 break;
1830 }
1831
52893733
EC
1832 if (!change_num_qps(mvdev, newqps))
1833 status = VIRTIO_NET_OK;
1834
1835 break;
1836 default:
1837 break;
1838 }
1839
1840 return status;
1841}
1842
baf2ad3f
EC
1843static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1844{
1845 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1846 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1847 struct mlx5_control_vq *cvq = &mvdev->cvq;
1848 __virtio16 vlan;
1849 size_t read;
1850 u16 id;
1851
5aec8049
EC
1852 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1853 return status;
1854
baf2ad3f
EC
1855 switch (cmd) {
1856 case VIRTIO_NET_CTRL_VLAN_ADD:
1857 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1858 if (read != sizeof(vlan))
1859 break;
1860
1861 id = mlx5vdpa16_to_cpu(mvdev, vlan);
1862 if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1863 break;
1864
1865 status = VIRTIO_NET_OK;
1866 break;
1867 case VIRTIO_NET_CTRL_VLAN_DEL:
1868 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1869 if (read != sizeof(vlan))
1870 break;
1871
1872 id = mlx5vdpa16_to_cpu(mvdev, vlan);
1873 mac_vlan_del(ndev, ndev->config.mac, id, true);
f766c409 1874 status = VIRTIO_NET_OK;
baf2ad3f
EC
1875 break;
1876 default:
f38b3c6a
DC
1877 break;
1878 }
baf2ad3f 1879
f38b3c6a 1880 return status;
baf2ad3f
EC
1881}
1882
5262912e
EC
1883static void mlx5_cvq_kick_handler(struct work_struct *work)
1884{
1885 virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1886 struct virtio_net_ctrl_hdr ctrl;
218bdd20 1887 struct mlx5_vdpa_wq_ent *wqent;
5262912e
EC
1888 struct mlx5_vdpa_dev *mvdev;
1889 struct mlx5_control_vq *cvq;
1890 struct mlx5_vdpa_net *ndev;
1891 size_t read, write;
1892 int err;
1893
218bdd20 1894 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
5262912e
EC
1895 mvdev = wqent->mvdev;
1896 ndev = to_mlx5_vdpa_ndev(mvdev);
1897 cvq = &mvdev->cvq;
1c80cf03 1898
759ae7f9 1899 down_write(&ndev->reslock);
1c80cf03
JW
1900
1901 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
1902 goto out;
1903
5262912e 1904 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
1c80cf03 1905 goto out;
5262912e
EC
1906
1907 if (!cvq->ready)
1c80cf03 1908 goto out;
5262912e
EC
1909
1910 while (true) {
1911 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
1912 GFP_ATOMIC);
1913 if (err <= 0)
1914 break;
1915
1916 read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
1917 if (read != sizeof(ctrl))
1918 break;
1919
1892a3d4 1920 cvq->received_desc++;
5262912e
EC
1921 switch (ctrl.class) {
1922 case VIRTIO_NET_CTRL_MAC:
1923 status = handle_ctrl_mac(mvdev, ctrl.cmd);
1924 break;
52893733
EC
1925 case VIRTIO_NET_CTRL_MQ:
1926 status = handle_ctrl_mq(mvdev, ctrl.cmd);
1927 break;
baf2ad3f
EC
1928 case VIRTIO_NET_CTRL_VLAN:
1929 status = handle_ctrl_vlan(mvdev, ctrl.cmd);
1930 break;
5262912e
EC
1931 default:
1932 break;
1933 }
1934
1935 /* Make sure data is written before advancing index */
1936 smp_wmb();
1937
1938 write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
1939 vringh_complete_iotlb(&cvq->vring, cvq->head, write);
1940 vringh_kiov_cleanup(&cvq->riov);
1941 vringh_kiov_cleanup(&cvq->wiov);
1942
1943 if (vringh_need_notify_iotlb(&cvq->vring))
1944 vringh_notify(&cvq->vring);
55ebf0d6 1945
1892a3d4 1946 cvq->completed_desc++;
55ebf0d6
JW
1947 queue_work(mvdev->wq, &wqent->work);
1948 break;
5262912e 1949 }
1c80cf03
JW
1950
1951out:
759ae7f9 1952 up_write(&ndev->reslock);
5262912e
EC
1953}
1954
1a86b377
EC
1955static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
1956{
1957 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1958 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
5262912e 1959 struct mlx5_vdpa_virtqueue *mvq;
1a86b377 1960
e4fc6650
EC
1961 if (!is_index_valid(mvdev, idx))
1962 return;
1963
5262912e 1964 if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
ad6dc1da 1965 if (!mvdev->wq || !mvdev->cvq.ready)
5262912e
EC
1966 return;
1967
55ebf0d6 1968 queue_work(mvdev->wq, &ndev->cvq_ent.work);
5262912e
EC
1969 return;
1970 }
1971
1972 mvq = &ndev->vqs[idx];
1a86b377
EC
1973 if (unlikely(!mvq->ready))
1974 return;
1975
1976 iowrite16(idx, ndev->mvdev.res.kick_addr);
1977}
1978
1979static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
1980 u64 driver_area, u64 device_area)
1981{
1982 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1983 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
5262912e 1984 struct mlx5_vdpa_virtqueue *mvq;
1a86b377 1985
e4fc6650
EC
1986 if (!is_index_valid(mvdev, idx))
1987 return -EINVAL;
1a86b377 1988
5262912e
EC
1989 if (is_ctrl_vq_idx(mvdev, idx)) {
1990 mvdev->cvq.desc_addr = desc_area;
1991 mvdev->cvq.device_addr = device_area;
1992 mvdev->cvq.driver_addr = driver_area;
1993 return 0;
1994 }
1995
1996 mvq = &ndev->vqs[idx];
1a86b377
EC
1997 mvq->desc_addr = desc_area;
1998 mvq->device_addr = device_area;
1999 mvq->driver_addr = driver_area;
2000 return 0;
2001}
2002
2003static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2004{
2005 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2006 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2007 struct mlx5_vdpa_virtqueue *mvq;
2008
5262912e 2009 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
e4fc6650
EC
2010 return;
2011
1a86b377
EC
2012 mvq = &ndev->vqs[idx];
2013 mvq->num_ent = num;
2014}
2015
2016static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2017{
2018 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2019 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1a86b377 2020
db296d25 2021 ndev->event_cbs[idx] = *cb;
40f2f3e9
EC
2022 if (is_ctrl_vq_idx(mvdev, idx))
2023 mvdev->cvq.event_cb = *cb;
1a86b377
EC
2024}
2025
5262912e
EC
2026static void mlx5_cvq_notify(struct vringh *vring)
2027{
2028 struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2029
2030 if (!cvq->event_cb.callback)
2031 return;
2032
2033 cvq->event_cb.callback(cvq->event_cb.private);
2034}
2035
2036static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2037{
2038 struct mlx5_control_vq *cvq = &mvdev->cvq;
2039
2040 cvq->ready = ready;
2041 if (!ready)
2042 return;
2043
2044 cvq->vring.notify = mlx5_cvq_notify;
1a86b377
EC
2045}
2046
2047static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2048{
2049 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2050 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
5262912e 2051 struct mlx5_vdpa_virtqueue *mvq;
cae15c2e 2052 int err;
1a86b377 2053
759be899
EC
2054 if (!mvdev->actual_features)
2055 return;
2056
e4fc6650
EC
2057 if (!is_index_valid(mvdev, idx))
2058 return;
2059
5262912e
EC
2060 if (is_ctrl_vq_idx(mvdev, idx)) {
2061 set_cvq_ready(mvdev, ready);
2062 return;
2063 }
1a86b377 2064
5262912e 2065 mvq = &ndev->vqs[idx];
cae15c2e 2066 if (!ready) {
1a86b377 2067 suspend_vq(ndev, mvq);
cae15c2e
EC
2068 } else {
2069 err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2070 if (err) {
2071 mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2072 ready = false;
2073 }
2074 }
2075
1a86b377
EC
2076
2077 mvq->ready = ready;
2078}
2079
2080static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2081{
2082 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2083 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1a86b377 2084
e4fc6650
EC
2085 if (!is_index_valid(mvdev, idx))
2086 return false;
2087
5262912e
EC
2088 if (is_ctrl_vq_idx(mvdev, idx))
2089 return mvdev->cvq.ready;
2090
2091 return ndev->vqs[idx].ready;
1a86b377
EC
2092}
2093
2094static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2095 const struct vdpa_vq_state *state)
2096{
2097 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2098 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
5262912e 2099 struct mlx5_vdpa_virtqueue *mvq;
1a86b377 2100
e4fc6650
EC
2101 if (!is_index_valid(mvdev, idx))
2102 return -EINVAL;
2103
5262912e
EC
2104 if (is_ctrl_vq_idx(mvdev, idx)) {
2105 mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2106 return 0;
2107 }
2108
2109 mvq = &ndev->vqs[idx];
1a86b377
EC
2110 if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2111 mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2112 return -EINVAL;
2113 }
2114
530a5678
JW
2115 mvq->used_idx = state->split.avail_index;
2116 mvq->avail_idx = state->split.avail_index;
1a86b377
EC
2117 return 0;
2118}
2119
2120static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2121{
2122 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2123 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
5262912e 2124 struct mlx5_vdpa_virtqueue *mvq;
1a86b377
EC
2125 struct mlx5_virtq_attr attr;
2126 int err;
2127
e4fc6650
EC
2128 if (!is_index_valid(mvdev, idx))
2129 return -EINVAL;
2130
5262912e
EC
2131 if (is_ctrl_vq_idx(mvdev, idx)) {
2132 state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2133 return 0;
2134 }
2135
2136 mvq = &ndev->vqs[idx];
3176e974
SWL
2137 /* If the virtq object was destroyed, use the value saved at
2138 * the last minute of suspend_vq. This caters for userspace
2139 * that cares about emulating the index after vq is stopped.
2140 */
2141 if (!mvq->initialized) {
bc04d93e
EC
2142 /* Firmware returns a wrong value for the available index.
2143 * Since both values should be identical, we take the value of
2144 * used_idx which is reported correctly.
2145 */
530a5678 2146 state->split.avail_index = mvq->used_idx;
3176e974
SWL
2147 return 0;
2148 }
1a86b377
EC
2149
2150 err = query_virtqueue(ndev, mvq, &attr);
2151 if (err) {
2152 mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2153 return err;
2154 }
530a5678 2155 state->split.avail_index = attr.used_index;
1a86b377
EC
2156 return 0;
2157}
2158
2159static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2160{
2161 return PAGE_SIZE;
2162}
2163
8fcd20c3 2164static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
d4821902 2165{
8fcd20c3
EC
2166 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2167
2168 if (is_ctrl_vq_idx(mvdev, idx))
2169 return MLX5_VDPA_CVQ_GROUP;
2170
2171 return MLX5_VDPA_DATAVQ_GROUP;
d4821902
GD
2172}
2173
1a86b377
EC
2174enum { MLX5_VIRTIO_NET_F_GUEST_CSUM = 1 << 9,
2175 MLX5_VIRTIO_NET_F_CSUM = 1 << 10,
2176 MLX5_VIRTIO_NET_F_HOST_TSO6 = 1 << 11,
2177 MLX5_VIRTIO_NET_F_HOST_TSO4 = 1 << 12,
2178};
2179
2180static u64 mlx_to_vritio_features(u16 dev_features)
2181{
2182 u64 result = 0;
2183
2184 if (dev_features & MLX5_VIRTIO_NET_F_GUEST_CSUM)
cbb52359 2185 result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
1a86b377 2186 if (dev_features & MLX5_VIRTIO_NET_F_CSUM)
cbb52359 2187 result |= BIT_ULL(VIRTIO_NET_F_CSUM);
1a86b377 2188 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO6)
cbb52359 2189 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
1a86b377 2190 if (dev_features & MLX5_VIRTIO_NET_F_HOST_TSO4)
cbb52359 2191 result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
1a86b377
EC
2192
2193 return result;
2194}
2195
79de65ed 2196static u64 get_supported_features(struct mlx5_core_dev *mdev)
1a86b377 2197{
79de65ed 2198 u64 mlx_vdpa_features = 0;
1a86b377
EC
2199 u16 dev_features;
2200
79de65ed
EC
2201 dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2202 mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2203 if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2204 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2205 mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2206 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2207 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2208 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2209 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2210 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
baf2ad3f 2211 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
deeacf35 2212 mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
79de65ed
EC
2213
2214 return mlx_vdpa_features;
2215}
2216
a64917bc 2217static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
1a86b377
EC
2218{
2219 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2220 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
5262912e 2221
1a86b377
EC
2222 print_features(mvdev, ndev->mvdev.mlx_features, false);
2223 return ndev->mvdev.mlx_features;
2224}
2225
30c22f38 2226static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
1a86b377 2227{
30c22f38 2228 /* Minimum features to expect */
cbb52359 2229 if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
1a86b377
EC
2230 return -EOPNOTSUPP;
2231
30c22f38
SWL
2232 /* Double check features combination sent down by the driver.
2233 * Fail invalid features due to absence of the depended feature.
2234 *
2235 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2236 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2237 * By failing the invalid features sent down by untrusted drivers,
2238 * we're assured the assumption made upon is_index_valid() and
2239 * is_ctrl_vq_idx() will not be compromised.
2240 */
2241 if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2242 BIT_ULL(VIRTIO_NET_F_MQ))
2243 return -EINVAL;
2244
1a86b377
EC
2245 return 0;
2246}
2247
ae0428de 2248static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
1a86b377 2249{
ae0428de 2250 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1a86b377
EC
2251 int err;
2252 int i;
2253
acde3929 2254 for (i = 0; i < mvdev->max_vqs; i++) {
1a86b377
EC
2255 err = setup_vq(ndev, &ndev->vqs[i]);
2256 if (err)
2257 goto err_vq;
2258 }
2259
2260 return 0;
2261
2262err_vq:
2263 for (--i; i >= 0; i--)
2264 teardown_vq(ndev, &ndev->vqs[i]);
2265
2266 return err;
2267}
2268
2269static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2270{
2271 struct mlx5_vdpa_virtqueue *mvq;
2272 int i;
2273
2274 for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2275 mvq = &ndev->vqs[i];
2276 if (!mvq->initialized)
2277 continue;
2278
2279 teardown_vq(ndev, mvq);
2280 }
2281}
2282
e4fc6650 2283static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
36bdcf31 2284{
e4fc6650
EC
2285 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2286 if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2287 /* MQ supported. CVQ index is right above the last data virtqueue's */
2288 mvdev->max_idx = mvdev->max_vqs;
2289 } else {
2290 /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2291 * CVQ gets index 2
2292 */
2293 mvdev->max_idx = 2;
2294 }
2295 } else {
2296 /* Two data virtqueues only: one for rx and one for tx */
2297 mvdev->max_idx = 1;
2298 }
36bdcf31
EC
2299}
2300
a64917bc 2301static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
1a86b377
EC
2302{
2303 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2304 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2305 int err;
2306
2307 print_features(mvdev, features, true);
2308
30c22f38 2309 err = verify_driver_features(mvdev, features);
1a86b377
EC
2310 if (err)
2311 return err;
2312
2313 ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
b03fc43e 2314 if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
acde3929 2315 ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
b03fc43e 2316 else
acde3929
EC
2317 ndev->rqt_size = 1;
2318
2319 ndev->cur_num_vqs = 2 * ndev->rqt_size;
b03fc43e 2320
e4fc6650 2321 update_cvq_info(mvdev);
1a86b377
EC
2322 return err;
2323}
2324
2325static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2326{
edf747af
EC
2327 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2328 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2329
2330 ndev->config_cb = *cb;
1a86b377
EC
2331}
2332
2333#define MLX5_VDPA_MAX_VQ_ENTRIES 256
2334static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2335{
2336 return MLX5_VDPA_MAX_VQ_ENTRIES;
2337}
2338
2339static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2340{
2341 return VIRTIO_ID_NET;
2342}
2343
2344static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2345{
2346 return PCI_VENDOR_ID_MELLANOX;
2347}
2348
2349static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2350{
2351 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2352 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2353
2354 print_status(mvdev, ndev->mvdev.status, false);
2355 return ndev->mvdev.status;
2356}
2357
2358static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2359{
2360 struct mlx5_vq_restore_info *ri = &mvq->ri;
52893733 2361 struct mlx5_virtq_attr attr = {};
1a86b377
EC
2362 int err;
2363
52893733
EC
2364 if (mvq->initialized) {
2365 err = query_virtqueue(ndev, mvq, &attr);
2366 if (err)
2367 return err;
2368 }
1a86b377
EC
2369
2370 ri->avail_index = attr.available_index;
b35ccebe 2371 ri->used_index = attr.used_index;
1a86b377
EC
2372 ri->ready = mvq->ready;
2373 ri->num_ent = mvq->num_ent;
2374 ri->desc_addr = mvq->desc_addr;
2375 ri->device_addr = mvq->device_addr;
2376 ri->driver_addr = mvq->driver_addr;
1a86b377
EC
2377 ri->restore = true;
2378 return 0;
2379}
2380
2381static int save_channels_info(struct mlx5_vdpa_net *ndev)
2382{
2383 int i;
2384
2385 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2386 memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2387 save_channel_info(ndev, &ndev->vqs[i]);
2388 }
2389 return 0;
2390}
2391
2392static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2393{
2394 int i;
2395
2396 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2397 memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2398}
2399
2400static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2401{
2402 struct mlx5_vdpa_virtqueue *mvq;
2403 struct mlx5_vq_restore_info *ri;
2404 int i;
2405
2406 mlx5_clear_vqs(ndev);
2407 init_mvqs(ndev);
2408 for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2409 mvq = &ndev->vqs[i];
2410 ri = &mvq->ri;
2411 if (!ri->restore)
2412 continue;
2413
2414 mvq->avail_idx = ri->avail_index;
b35ccebe 2415 mvq->used_idx = ri->used_index;
1a86b377
EC
2416 mvq->ready = ri->ready;
2417 mvq->num_ent = ri->num_ent;
2418 mvq->desc_addr = ri->desc_addr;
2419 mvq->device_addr = ri->device_addr;
2420 mvq->driver_addr = ri->driver_addr;
1a86b377
EC
2421 }
2422}
2423
38fc462f
EC
2424static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2425 struct vhost_iotlb *iotlb, unsigned int asid)
1a86b377 2426{
ae0428de 2427 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1a86b377
EC
2428 int err;
2429
2430 suspend_vqs(ndev);
2431 err = save_channels_info(ndev);
2432 if (err)
2433 goto err_mr;
2434
2435 teardown_driver(ndev);
ae0428de 2436 mlx5_vdpa_destroy_mr(mvdev);
38fc462f 2437 err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
1a86b377
EC
2438 if (err)
2439 goto err_mr;
2440
09e65ee9 2441 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
1c80cf03 2442 goto err_mr;
1897f0b6 2443
1a86b377 2444 restore_channels_info(ndev);
ae0428de 2445 err = setup_driver(mvdev);
1a86b377
EC
2446 if (err)
2447 goto err_setup;
2448
2449 return 0;
2450
2451err_setup:
ae0428de 2452 mlx5_vdpa_destroy_mr(mvdev);
1a86b377
EC
2453err_mr:
2454 return err;
2455}
2456
1c80cf03 2457/* reslock must be held for this function */
ae0428de 2458static int setup_driver(struct mlx5_vdpa_dev *mvdev)
1a86b377 2459{
ae0428de 2460 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1a86b377
EC
2461 int err;
2462
759ae7f9 2463 WARN_ON(!rwsem_is_locked(&ndev->reslock));
1c80cf03 2464
1a86b377 2465 if (ndev->setup) {
ae0428de 2466 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
1a86b377
EC
2467 err = 0;
2468 goto out;
2469 }
f0417e72 2470 mlx5_vdpa_add_debugfs(ndev);
ae0428de 2471 err = setup_virtqueues(mvdev);
1a86b377 2472 if (err) {
ae0428de 2473 mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
f0417e72 2474 goto err_setup;
1a86b377
EC
2475 }
2476
2477 err = create_rqt(ndev);
2478 if (err) {
ae0428de 2479 mlx5_vdpa_warn(mvdev, "create_rqt\n");
1a86b377
EC
2480 goto err_rqt;
2481 }
2482
2483 err = create_tir(ndev);
2484 if (err) {
ae0428de 2485 mlx5_vdpa_warn(mvdev, "create_tir\n");
1a86b377
EC
2486 goto err_tir;
2487 }
2488
baf2ad3f 2489 err = setup_steering(ndev);
1a86b377 2490 if (err) {
baf2ad3f 2491 mlx5_vdpa_warn(mvdev, "setup_steering\n");
1a86b377
EC
2492 goto err_fwd;
2493 }
2494 ndev->setup = true;
1a86b377
EC
2495
2496 return 0;
2497
2498err_fwd:
2499 destroy_tir(ndev);
2500err_tir:
2501 destroy_rqt(ndev);
2502err_rqt:
2503 teardown_virtqueues(ndev);
f0417e72
EC
2504err_setup:
2505 mlx5_vdpa_remove_debugfs(ndev->debugfs);
1a86b377 2506out:
1a86b377
EC
2507 return err;
2508}
2509
1c80cf03 2510/* reslock must be held for this function */
1a86b377
EC
2511static void teardown_driver(struct mlx5_vdpa_net *ndev)
2512{
1c80cf03 2513
759ae7f9 2514 WARN_ON(!rwsem_is_locked(&ndev->reslock));
1c80cf03 2515
1a86b377 2516 if (!ndev->setup)
1c80cf03 2517 return;
1a86b377 2518
f0417e72
EC
2519 mlx5_vdpa_remove_debugfs(ndev->debugfs);
2520 ndev->debugfs = NULL;
baf2ad3f 2521 teardown_steering(ndev);
1a86b377
EC
2522 destroy_tir(ndev);
2523 destroy_rqt(ndev);
2524 teardown_virtqueues(ndev);
2525 ndev->setup = false;
1a86b377
EC
2526}
2527
e3aadf2e
EC
2528static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2529{
2530 int i;
2531
2532 for (i = 0; i < ndev->mvdev.max_vqs; i++)
2533 ndev->vqs[i].ready = false;
ef12e4bf
EC
2534
2535 ndev->mvdev.cvq.ready = false;
e3aadf2e
EC
2536}
2537
ace92524
EC
2538static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2539{
2540 struct mlx5_control_vq *cvq = &mvdev->cvq;
2541 int err = 0;
2542
2543 if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
2544 err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2545 MLX5_CVQ_MAX_ENT, false,
2546 (struct vring_desc *)(uintptr_t)cvq->desc_addr,
2547 (struct vring_avail *)(uintptr_t)cvq->driver_addr,
2548 (struct vring_used *)(uintptr_t)cvq->device_addr);
2549
2550 return err;
2551}
2552
1a86b377
EC
2553static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2554{
2555 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2556 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2557 int err;
2558
2559 print_status(mvdev, status, true);
1a86b377 2560
759ae7f9 2561 down_write(&ndev->reslock);
1c80cf03 2562
1a86b377
EC
2563 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2564 if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
ace92524
EC
2565 err = setup_cvq_vring(mvdev);
2566 if (err) {
2567 mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2568 goto err_setup;
2569 }
ae0428de 2570 err = setup_driver(mvdev);
1a86b377
EC
2571 if (err) {
2572 mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2573 goto err_setup;
2574 }
2575 } else {
2576 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
1c80cf03 2577 goto err_clear;
1a86b377
EC
2578 }
2579 }
2580
2581 ndev->mvdev.status = status;
759ae7f9 2582 up_write(&ndev->reslock);
1a86b377
EC
2583 return;
2584
2585err_setup:
2586 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2587 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
1c80cf03 2588err_clear:
759ae7f9 2589 up_write(&ndev->reslock);
1a86b377
EC
2590}
2591
8fcd20c3
EC
2592static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2593{
2594 int i;
2595
2596 /* default mapping all groups are mapped to asid 0 */
2597 for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2598 mvdev->group2asid[i] = 0;
2599}
2600
0686082d
XY
2601static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2602{
2603 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2604 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2605
2606 print_status(mvdev, 0, true);
2607 mlx5_vdpa_info(mvdev, "performing device reset\n");
1c80cf03 2608
759ae7f9 2609 down_write(&ndev->reslock);
0686082d
XY
2610 teardown_driver(ndev);
2611 clear_vqs_ready(ndev);
2612 mlx5_vdpa_destroy_mr(&ndev->mvdev);
2613 ndev->mvdev.status = 0;
09e65ee9 2614 ndev->mvdev.suspended = false;
b03fc43e 2615 ndev->cur_num_vqs = 0;
1892a3d4
EC
2616 ndev->mvdev.cvq.received_desc = 0;
2617 ndev->mvdev.cvq.completed_desc = 0;
75560522 2618 memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
0686082d 2619 ndev->mvdev.actual_features = 0;
8fcd20c3 2620 init_group_to_asid_map(mvdev);
0686082d 2621 ++mvdev->generation;
8fcd20c3 2622
0686082d 2623 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
38fc462f 2624 if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
0686082d
XY
2625 mlx5_vdpa_warn(mvdev, "create MR failed\n");
2626 }
759ae7f9 2627 up_write(&ndev->reslock);
0686082d
XY
2628
2629 return 0;
2630}
2631
442706f9
SG
2632static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2633{
2634 return sizeof(struct virtio_net_config);
2635}
2636
1a86b377
EC
2637static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2638 unsigned int len)
2639{
2640 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2641 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2642
dcfde163 2643 if (offset + len <= sizeof(struct virtio_net_config))
2874211f 2644 memcpy(buf, (u8 *)&ndev->config + offset, len);
1a86b377
EC
2645}
2646
2647static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2648 unsigned int len)
2649{
2650 /* not supported */
2651}
2652
2653static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2654{
2655 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2656
2657 return mvdev->generation;
2658}
2659
38fc462f
EC
2660static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2661 unsigned int asid)
1a86b377 2662{
1a86b377
EC
2663 bool change_map;
2664 int err;
2665
38fc462f 2666 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
1a86b377
EC
2667 if (err) {
2668 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
8fcd20c3 2669 return err;
1a86b377
EC
2670 }
2671
2672 if (change_map)
38fc462f 2673 err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
1a86b377 2674
8fcd20c3
EC
2675 return err;
2676}
2677
2678static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2679 struct vhost_iotlb *iotlb)
2680{
2681 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2682 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
93e530d2 2683 int err = -EINVAL;
8fcd20c3
EC
2684
2685 down_write(&ndev->reslock);
38fc462f 2686 err = set_map_data(mvdev, iotlb, asid);
759ae7f9 2687 up_write(&ndev->reslock);
1c80cf03 2688 return err;
1a86b377
EC
2689}
2690
36871fb9
JW
2691static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2692{
2693 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2694
2695 if (is_ctrl_vq_idx(mvdev, idx))
2696 return &vdev->dev;
2697
2698 return mvdev->vdev.dma_dev;
2699}
2700
1a86b377
EC
2701static void mlx5_vdpa_free(struct vdpa_device *vdev)
2702{
2703 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
7c9f131f 2704 struct mlx5_core_dev *pfmdev;
1a86b377
EC
2705 struct mlx5_vdpa_net *ndev;
2706
2707 ndev = to_mlx5_vdpa_ndev(mvdev);
2708
2709 free_resources(ndev);
6f5312f8 2710 mlx5_vdpa_destroy_mr(mvdev);
7c9f131f
EC
2711 if (!is_zero_ether_addr(ndev->config.mac)) {
2712 pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
2713 mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
2714 }
1a86b377 2715 mlx5_vdpa_free_resources(&ndev->mvdev);
75560522
EC
2716 kfree(ndev->event_cbs);
2717 kfree(ndev->vqs);
1a86b377
EC
2718}
2719
2720static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
2721{
b57c46cb 2722 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
1a86b377 2723 struct vdpa_notification_area ret = {};
b57c46cb
EC
2724 struct mlx5_vdpa_net *ndev;
2725 phys_addr_t addr;
1a86b377 2726
5262912e 2727 if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
e4fc6650
EC
2728 return ret;
2729
b57c46cb
EC
2730 /* If SF BAR size is smaller than PAGE_SIZE, do not use direct
2731 * notification to avoid the risk of mapping pages that contain BAR of more
2732 * than one SF
2733 */
2734 if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
2735 return ret;
2736
2737 ndev = to_mlx5_vdpa_ndev(mvdev);
2738 addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
2739 ret.addr = addr;
2740 ret.size = PAGE_SIZE;
1a86b377
EC
2741 return ret;
2742}
2743
2744static int mlx5_get_vq_irq(struct vdpa_device *vdv, u16 idx)
2745{
2746 return -EOPNOTSUPP;
2747}
2748
a64917bc
EC
2749static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
2750{
2751 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2752
2753 return mvdev->actual_features;
2754}
2755
1892a3d4
EC
2756static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
2757 u64 *received_desc, u64 *completed_desc)
2758{
2759 u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
2760 u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
2761 void *cmd_hdr;
2762 void *ctx;
2763 int err;
2764
2765 if (!counters_supported(&ndev->mvdev))
2766 return -EOPNOTSUPP;
2767
2768 if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
2769 return -EAGAIN;
2770
2771 cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
2772
2773 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
2774 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
2775 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
2776 MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
2777
2778 err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
2779 if (err)
2780 return err;
2781
2782 ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
2783 *received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
2784 *completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
2785 return 0;
2786}
2787
2788static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
2789 struct sk_buff *msg,
2790 struct netlink_ext_ack *extack)
2791{
2792 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2793 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2794 struct mlx5_vdpa_virtqueue *mvq;
2795 struct mlx5_control_vq *cvq;
2796 u64 received_desc;
2797 u64 completed_desc;
2798 int err = 0;
2799
759ae7f9 2800 down_read(&ndev->reslock);
1892a3d4
EC
2801 if (!is_index_valid(mvdev, idx)) {
2802 NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
2803 err = -EINVAL;
2804 goto out_err;
2805 }
2806
2807 if (idx == ctrl_vq_idx(mvdev)) {
2808 cvq = &mvdev->cvq;
2809 received_desc = cvq->received_desc;
2810 completed_desc = cvq->completed_desc;
2811 goto out;
2812 }
2813
2814 mvq = &ndev->vqs[idx];
2815 err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
2816 if (err) {
2817 NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
2818 goto out_err;
2819 }
2820
2821out:
2822 err = -EMSGSIZE;
2823 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
2824 goto out_err;
2825
2826 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
2827 VDPA_ATTR_PAD))
2828 goto out_err;
2829
2830 if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
2831 goto out_err;
2832
2833 if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
2834 VDPA_ATTR_PAD))
2835 goto out_err;
2836
2837 err = 0;
2838out_err:
759ae7f9 2839 up_read(&ndev->reslock);
1892a3d4
EC
2840 return err;
2841}
2842
cae15c2e
EC
2843static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
2844{
2845 struct mlx5_control_vq *cvq;
2846
2847 if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2848 return;
2849
2850 cvq = &mvdev->cvq;
2851 cvq->ready = false;
2852}
2853
2854static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
2855{
2856 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2857 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2858 struct mlx5_vdpa_virtqueue *mvq;
2859 int i;
2860
09e65ee9
SWL
2861 mlx5_vdpa_info(mvdev, "suspending device\n");
2862
cae15c2e 2863 down_write(&ndev->reslock);
cae15c2e 2864 ndev->nb_registered = false;
0dbc1b4a 2865 mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
cae15c2e
EC
2866 flush_workqueue(ndev->mvdev.wq);
2867 for (i = 0; i < ndev->cur_num_vqs; i++) {
2868 mvq = &ndev->vqs[i];
2869 suspend_vq(ndev, mvq);
2870 }
2871 mlx5_vdpa_cvq_suspend(mvdev);
09e65ee9 2872 mvdev->suspended = true;
cae15c2e
EC
2873 up_write(&ndev->reslock);
2874 return 0;
2875}
2876
8fcd20c3
EC
2877static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
2878 unsigned int asid)
2879{
2880 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2881
2882 if (group >= MLX5_VDPA_NUMVQ_GROUPS)
2883 return -EINVAL;
2884
2885 mvdev->group2asid[group] = asid;
2886 return 0;
2887}
2888
1a86b377
EC
2889static const struct vdpa_config_ops mlx5_vdpa_ops = {
2890 .set_vq_address = mlx5_vdpa_set_vq_address,
2891 .set_vq_num = mlx5_vdpa_set_vq_num,
2892 .kick_vq = mlx5_vdpa_kick_vq,
2893 .set_vq_cb = mlx5_vdpa_set_vq_cb,
2894 .set_vq_ready = mlx5_vdpa_set_vq_ready,
2895 .get_vq_ready = mlx5_vdpa_get_vq_ready,
2896 .set_vq_state = mlx5_vdpa_set_vq_state,
2897 .get_vq_state = mlx5_vdpa_get_vq_state,
1892a3d4 2898 .get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
1a86b377
EC
2899 .get_vq_notification = mlx5_get_vq_notification,
2900 .get_vq_irq = mlx5_get_vq_irq,
2901 .get_vq_align = mlx5_vdpa_get_vq_align,
d4821902 2902 .get_vq_group = mlx5_vdpa_get_vq_group,
a64917bc
EC
2903 .get_device_features = mlx5_vdpa_get_device_features,
2904 .set_driver_features = mlx5_vdpa_set_driver_features,
2905 .get_driver_features = mlx5_vdpa_get_driver_features,
1a86b377
EC
2906 .set_config_cb = mlx5_vdpa_set_config_cb,
2907 .get_vq_num_max = mlx5_vdpa_get_vq_num_max,
2908 .get_device_id = mlx5_vdpa_get_device_id,
2909 .get_vendor_id = mlx5_vdpa_get_vendor_id,
2910 .get_status = mlx5_vdpa_get_status,
2911 .set_status = mlx5_vdpa_set_status,
0686082d 2912 .reset = mlx5_vdpa_reset,
442706f9 2913 .get_config_size = mlx5_vdpa_get_config_size,
1a86b377
EC
2914 .get_config = mlx5_vdpa_get_config,
2915 .set_config = mlx5_vdpa_set_config,
2916 .get_generation = mlx5_vdpa_get_generation,
2917 .set_map = mlx5_vdpa_set_map,
8fcd20c3 2918 .set_group_asid = mlx5_set_group_asid,
36871fb9 2919 .get_vq_dma_dev = mlx5_get_vq_dma_dev,
1a86b377 2920 .free = mlx5_vdpa_free,
cae15c2e 2921 .suspend = mlx5_vdpa_suspend,
1a86b377
EC
2922};
2923
d084d996
SWL
2924static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
2925{
2926 u16 hw_mtu;
2927 int err;
2928
2929 err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2930 if (err)
2931 return err;
2932
2933 *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
2934 return 0;
2935}
2936
1a86b377
EC
2937static int alloc_resources(struct mlx5_vdpa_net *ndev)
2938{
2939 struct mlx5_vdpa_net_resources *res = &ndev->res;
2940 int err;
2941
2942 if (res->valid) {
2943 mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
2944 return -EEXIST;
2945 }
2946
2947 err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
2948 if (err)
2949 return err;
2950
2951 err = create_tis(ndev);
2952 if (err)
2953 goto err_tis;
2954
2955 res->valid = true;
2956
2957 return 0;
2958
2959err_tis:
2960 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2961 return err;
2962}
2963
2964static void free_resources(struct mlx5_vdpa_net *ndev)
2965{
2966 struct mlx5_vdpa_net_resources *res = &ndev->res;
2967
2968 if (!res->valid)
2969 return;
2970
2971 destroy_tis(ndev);
2972 mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
2973 res->valid = false;
2974}
2975
2976static void init_mvqs(struct mlx5_vdpa_net *ndev)
2977{
2978 struct mlx5_vdpa_virtqueue *mvq;
2979 int i;
2980
acde3929 2981 for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
1a86b377
EC
2982 mvq = &ndev->vqs[i];
2983 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2984 mvq->index = i;
2985 mvq->ndev = ndev;
2986 mvq->fwqp.fw = true;
cae15c2e 2987 mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
1a86b377
EC
2988 }
2989 for (; i < ndev->mvdev.max_vqs; i++) {
2990 mvq = &ndev->vqs[i];
2991 memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2992 mvq->index = i;
2993 mvq->ndev = ndev;
2994 }
2995}
2996
58926c8a
EC
2997struct mlx5_vdpa_mgmtdev {
2998 struct vdpa_mgmt_dev mgtdev;
2999 struct mlx5_adev *madev;
3000 struct mlx5_vdpa_net *ndev;
3001};
3002
edf747af
EC
3003static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
3004{
3005 u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
3006 u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
3007 int err;
3008
3009 MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
3010 MLX5_SET(query_vport_state_in, in, op_mod, opmod);
3011 MLX5_SET(query_vport_state_in, in, vport_number, vport);
3012 if (vport)
3013 MLX5_SET(query_vport_state_in, in, other_vport, 1);
3014
3015 err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
3016 if (err)
3017 return 0;
3018
3019 return MLX5_GET(query_vport_state_out, out, state);
3020}
3021
3022static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
3023{
3024 if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
3025 VPORT_STATE_UP)
3026 return true;
3027
3028 return false;
3029}
3030
3031static void update_carrier(struct work_struct *work)
3032{
3033 struct mlx5_vdpa_wq_ent *wqent;
3034 struct mlx5_vdpa_dev *mvdev;
3035 struct mlx5_vdpa_net *ndev;
3036
3037 wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
3038 mvdev = wqent->mvdev;
3039 ndev = to_mlx5_vdpa_ndev(mvdev);
3040 if (get_link_state(mvdev))
3041 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3042 else
3043 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3044
0dbc1b4a 3045 if (ndev->nb_registered && ndev->config_cb.callback)
edf747af
EC
3046 ndev->config_cb.callback(ndev->config_cb.private);
3047
3048 kfree(wqent);
3049}
3050
3051static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
3052{
3053 struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
3054 struct mlx5_eqe *eqe = param;
3055 int ret = NOTIFY_DONE;
3056 struct mlx5_vdpa_wq_ent *wqent;
3057
3058 if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
033779a7
SWL
3059 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
3060 return NOTIFY_DONE;
edf747af
EC
3061 switch (eqe->sub_type) {
3062 case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
3063 case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
3064 wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
0dbc1b4a 3065 if (!wqent)
edf747af
EC
3066 return NOTIFY_DONE;
3067
3068 wqent->mvdev = &ndev->mvdev;
3069 INIT_WORK(&wqent->work, update_carrier);
3070 queue_work(ndev->mvdev.wq, &wqent->work);
3071 ret = NOTIFY_OK;
3072 break;
3073 default:
3074 return NOTIFY_DONE;
3075 }
3076 return ret;
3077 }
3078 return ret;
3079}
3080
1e00e821
EC
3081static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3082{
3083 int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3084 void *in;
3085 int err;
3086
3087 in = kvzalloc(inlen, GFP_KERNEL);
3088 if (!in)
3089 return -ENOMEM;
3090
3091 MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3092 MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3093 mtu + MLX5V_ETH_HARD_MTU);
3094 MLX5_SET(modify_nic_vport_context_in, in, opcode,
3095 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3096
3097 err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3098
3099 kvfree(in);
3100 return err;
3101}
3102
d8ca2fa5
PP
3103static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3104 const struct vdpa_dev_set_config *add_config)
1a86b377 3105{
58926c8a 3106 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
1a86b377 3107 struct virtio_net_config *config;
7c9f131f 3108 struct mlx5_core_dev *pfmdev;
1a86b377
EC
3109 struct mlx5_vdpa_dev *mvdev;
3110 struct mlx5_vdpa_net *ndev;
58926c8a 3111 struct mlx5_core_dev *mdev;
deeacf35 3112 u64 device_features;
1a86b377 3113 u32 max_vqs;
246fd1ca 3114 u16 mtu;
1a86b377
EC
3115 int err;
3116
58926c8a
EC
3117 if (mgtdev->ndev)
3118 return -ENOSPC;
3119
3120 mdev = mgtdev->madev->mdev;
deeacf35
SWL
3121 device_features = mgtdev->mgtdev.supported_features;
3122 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3123 if (add_config->device_features & ~device_features) {
3124 dev_warn(mdev->device,
3125 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3126 add_config->device_features, device_features);
3127 return -EINVAL;
3128 }
3129 device_features &= add_config->device_features;
3130 }
3131 if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3132 device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3133 dev_warn(mdev->device,
3134 "Must provision minimum features 0x%llx for this device",
3135 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3136 return -EOPNOTSUPP;
3137 }
3138
879753c8
EC
3139 if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3140 MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3141 dev_warn(mdev->device, "missing support for split virtqueues\n");
3142 return -EOPNOTSUPP;
3143 }
3144
acde3929
EC
3145 max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3146 1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
75560522
EC
3147 if (max_vqs < 2) {
3148 dev_warn(mdev->device,
3149 "%d virtqueues are supported. At least 2 are required\n",
3150 max_vqs);
3151 return -EAGAIN;
3152 }
3153
3154 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3155 if (add_config->net.max_vq_pairs > max_vqs / 2)
3156 return -EINVAL;
3157 max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3158 } else {
3159 max_vqs = 2;
3160 }
1a86b377
EC
3161
3162 ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
8fcd20c3 3163 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
1a86b377 3164 if (IS_ERR(ndev))
74c9729d 3165 return PTR_ERR(ndev);
1a86b377
EC
3166
3167 ndev->mvdev.max_vqs = max_vqs;
3168 mvdev = &ndev->mvdev;
3169 mvdev->mdev = mdev;
75560522
EC
3170
3171 ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3172 ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3173 if (!ndev->vqs || !ndev->event_cbs) {
3174 err = -ENOMEM;
3175 goto err_alloc;
3176 }
3177
1a86b377 3178 init_mvqs(ndev);
759ae7f9 3179 init_rwsem(&ndev->reslock);
1a86b377 3180 config = &ndev->config;
1e00e821
EC
3181
3182 if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3183 err = config_func_mtu(mdev, add_config->net.mtu);
3184 if (err)
759ae7f9 3185 goto err_alloc;
1e00e821
EC
3186 }
3187
deeacf35 3188 if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
033779a7
SWL
3189 err = query_mtu(mdev, &mtu);
3190 if (err)
3191 goto err_alloc;
1a86b377 3192
033779a7
SWL
3193 ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3194 }
1a86b377 3195
deeacf35 3196 if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
033779a7
SWL
3197 if (get_link_state(mvdev))
3198 ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3199 else
3200 ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3201 }
edf747af 3202
a007d940
EC
3203 if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3204 memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
deeacf35
SWL
3205 /* No bother setting mac address in config if not going to provision _F_MAC */
3206 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3207 device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
a007d940
EC
3208 err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3209 if (err)
759ae7f9 3210 goto err_alloc;
a007d940 3211 }
1a86b377 3212
7c9f131f
EC
3213 if (!is_zero_ether_addr(config->mac)) {
3214 pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3215 err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3216 if (err)
759ae7f9 3217 goto err_alloc;
deeacf35
SWL
3218 } else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3219 /*
3220 * We used to clear _F_MAC feature bit if seeing
3221 * zero mac address when device features are not
3222 * specifically provisioned. Keep the behaviour
3223 * so old scripts do not break.
3224 */
3225 device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3226 } else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3227 /* Don't provision zero mac address for _F_MAC */
3228 mlx5_vdpa_warn(&ndev->mvdev,
3229 "No mac address provisioned?\n");
3230 err = -EINVAL;
3231 goto err_alloc;
7c9f131f
EC
3232 }
3233
deeacf35
SWL
3234 if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3235 config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3236
3237 ndev->mvdev.mlx_features = device_features;
7d23dcdf 3238 mvdev->vdev.dma_dev = &mdev->pdev->dev;
1a86b377
EC
3239 err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3240 if (err)
7c9f131f 3241 goto err_mpfs;
1a86b377 3242
6f5312f8 3243 if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
38fc462f 3244 err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
6f5312f8
EC
3245 if (err)
3246 goto err_res;
3247 }
3248
1a86b377
EC
3249 err = alloc_resources(ndev);
3250 if (err)
6f5312f8 3251 goto err_mr;
1a86b377 3252
55ebf0d6
JW
3253 ndev->cvq_ent.mvdev = mvdev;
3254 INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
218bdd20 3255 mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
5262912e
EC
3256 if (!mvdev->wq) {
3257 err = -ENOMEM;
3258 goto err_res2;
3259 }
3260
edf747af
EC
3261 ndev->nb.notifier_call = event_handler;
3262 mlx5_notifier_register(mdev, &ndev->nb);
cae15c2e 3263 ndev->nb_registered = true;
58926c8a 3264 mvdev->vdev.mdev = &mgtdev->mgtdev;
acde3929 3265 err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
1a86b377
EC
3266 if (err)
3267 goto err_reg;
3268
58926c8a 3269 mgtdev->ndev = ndev;
74c9729d 3270 return 0;
1a86b377
EC
3271
3272err_reg:
5262912e
EC
3273 destroy_workqueue(mvdev->wq);
3274err_res2:
1a86b377 3275 free_resources(ndev);
6f5312f8
EC
3276err_mr:
3277 mlx5_vdpa_destroy_mr(mvdev);
1a86b377
EC
3278err_res:
3279 mlx5_vdpa_free_resources(&ndev->mvdev);
7c9f131f
EC
3280err_mpfs:
3281 if (!is_zero_ether_addr(config->mac))
3282 mlx5_mpfs_del_mac(pfmdev, config->mac);
75560522 3283err_alloc:
1a86b377 3284 put_device(&mvdev->vdev.dev);
74c9729d 3285 return err;
1a86b377
EC
3286}
3287
58926c8a
EC
3288static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3289{
3290 struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
5262912e 3291 struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
edf747af 3292 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
ad6dc1da 3293 struct workqueue_struct *wq;
58926c8a 3294
29422100
EC
3295 mlx5_vdpa_remove_debugfs(ndev->debugfs);
3296 ndev->debugfs = NULL;
cae15c2e 3297 if (ndev->nb_registered) {
cae15c2e 3298 ndev->nb_registered = false;
0dbc1b4a 3299 mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
cae15c2e 3300 }
ad6dc1da
EC
3301 wq = mvdev->wq;
3302 mvdev->wq = NULL;
3303 destroy_workqueue(wq);
58926c8a
EC
3304 _vdpa_unregister_device(dev);
3305 mgtdev->ndev = NULL;
3306}
3307
3308static const struct vdpa_mgmtdev_ops mdev_ops = {
3309 .dev_add = mlx5_vdpa_dev_add,
3310 .dev_del = mlx5_vdpa_dev_del,
3311};
3312
3313static struct virtio_device_id id_table[] = {
3314 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3315 { 0 },
3316};
3317
3318static int mlx5v_probe(struct auxiliary_device *adev,
3319 const struct auxiliary_device_id *id)
3320
3321{
3322 struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3323 struct mlx5_core_dev *mdev = madev->mdev;
3324 struct mlx5_vdpa_mgmtdev *mgtdev;
3325 int err;
3326
3327 mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3328 if (!mgtdev)
3329 return -ENOMEM;
3330
3331 mgtdev->mgtdev.ops = &mdev_ops;
3332 mgtdev->mgtdev.device = mdev->device;
3333 mgtdev->mgtdev.id_table = id_table;
75560522 3334 mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
1e00e821 3335 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
deeacf35
SWL
3336 BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3337 BIT_ULL(VDPA_ATTR_DEV_FEATURES);
79de65ed
EC
3338 mgtdev->mgtdev.max_supported_vqs =
3339 MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3340 mgtdev->mgtdev.supported_features = get_supported_features(mdev);
58926c8a
EC
3341 mgtdev->madev = madev;
3342
3343 err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3344 if (err)
3345 goto reg_err;
3346
45e3a279 3347 auxiliary_set_drvdata(adev, mgtdev);
58926c8a
EC
3348
3349 return 0;
3350
3351reg_err:
3352 kfree(mgtdev);
3353 return err;
3354}
3355
74c9729d 3356static void mlx5v_remove(struct auxiliary_device *adev)
1a86b377 3357{
58926c8a 3358 struct mlx5_vdpa_mgmtdev *mgtdev;
74c9729d 3359
45e3a279 3360 mgtdev = auxiliary_get_drvdata(adev);
58926c8a
EC
3361 vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3362 kfree(mgtdev);
1a86b377 3363}
74c9729d
LR
3364
3365static const struct auxiliary_device_id mlx5v_id_table[] = {
3366 { .name = MLX5_ADEV_NAME ".vnet", },
3367 {},
3368};
3369
3370MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3371
3372static struct auxiliary_driver mlx5v_driver = {
3373 .name = "vnet",
3374 .probe = mlx5v_probe,
3375 .remove = mlx5v_remove,
3376 .id_table = mlx5v_id_table,
3377};
3378
3379module_auxiliary_driver(mlx5v_driver);