Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
[linux-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_main.c
1 /*
2  * Copyright (c) 2015-2016, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <net/tc_act/tc_gact.h>
34 #include <linux/mlx5/fs.h>
35 #include <net/vxlan.h>
36 #include <net/geneve.h>
37 #include <linux/bpf.h>
38 #include <linux/debugfs.h>
39 #include <linux/if_bridge.h>
40 #include <linux/filter.h>
41 #include <net/page_pool.h>
42 #include <net/pkt_sched.h>
43 #include <net/xdp_sock_drv.h>
44 #include "eswitch.h"
45 #include "en.h"
46 #include "en/txrx.h"
47 #include "en_tc.h"
48 #include "en_rep.h"
49 #include "en_accel/ipsec.h"
50 #include "en_accel/macsec.h"
51 #include "en_accel/en_accel.h"
52 #include "en_accel/ktls.h"
53 #include "lib/vxlan.h"
54 #include "lib/clock.h"
55 #include "en/port.h"
56 #include "en/xdp.h"
57 #include "lib/eq.h"
58 #include "en/monitor_stats.h"
59 #include "en/health.h"
60 #include "en/params.h"
61 #include "en/xsk/pool.h"
62 #include "en/xsk/setup.h"
63 #include "en/xsk/rx.h"
64 #include "en/xsk/tx.h"
65 #include "en/hv_vhca_stats.h"
66 #include "en/devlink.h"
67 #include "lib/mlx5.h"
68 #include "en/ptp.h"
69 #include "en/htb.h"
70 #include "qos.h"
71 #include "en/trap.h"
72
73 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift,
74                                             enum mlx5e_mpwrq_umr_mode umr_mode)
75 {
76         u16 umr_wqebbs, max_wqebbs;
77         bool striding_rq_umr;
78
79         striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) && MLX5_CAP_GEN(mdev, umr_ptr_rlky) &&
80                           MLX5_CAP_ETH(mdev, reg_umr_sq);
81         if (!striding_rq_umr)
82                 return false;
83
84         umr_wqebbs = mlx5e_mpwrq_umr_wqebbs(mdev, page_shift, umr_mode);
85         max_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
86         /* Sanity check; should never happen, because mlx5e_mpwrq_umr_wqebbs is
87          * calculated from mlx5e_get_max_sq_aligned_wqebbs.
88          */
89         if (WARN_ON(umr_wqebbs > max_wqebbs))
90                 return false;
91
92         return true;
93 }
94
95 void mlx5e_update_carrier(struct mlx5e_priv *priv)
96 {
97         struct mlx5_core_dev *mdev = priv->mdev;
98         u8 port_state;
99         bool up;
100
101         port_state = mlx5_query_vport_state(mdev,
102                                             MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
103                                             0);
104
105         up = port_state == VPORT_STATE_UP;
106         if (up == netif_carrier_ok(priv->netdev))
107                 netif_carrier_event(priv->netdev);
108         if (up) {
109                 netdev_info(priv->netdev, "Link up\n");
110                 netif_carrier_on(priv->netdev);
111         } else {
112                 netdev_info(priv->netdev, "Link down\n");
113                 netif_carrier_off(priv->netdev);
114         }
115 }
116
117 static void mlx5e_update_carrier_work(struct work_struct *work)
118 {
119         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
120                                                update_carrier_work);
121
122         mutex_lock(&priv->state_lock);
123         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
124                 if (priv->profile->update_carrier)
125                         priv->profile->update_carrier(priv);
126         mutex_unlock(&priv->state_lock);
127 }
128
129 static void mlx5e_update_stats_work(struct work_struct *work)
130 {
131         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
132                                                update_stats_work);
133
134         mutex_lock(&priv->state_lock);
135         priv->profile->update_stats(priv);
136         mutex_unlock(&priv->state_lock);
137 }
138
139 void mlx5e_queue_update_stats(struct mlx5e_priv *priv)
140 {
141         if (!priv->profile->update_stats)
142                 return;
143
144         if (unlikely(test_bit(MLX5E_STATE_DESTROYING, &priv->state)))
145                 return;
146
147         queue_work(priv->wq, &priv->update_stats_work);
148 }
149
150 static int async_event(struct notifier_block *nb, unsigned long event, void *data)
151 {
152         struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, events_nb);
153         struct mlx5_eqe   *eqe = data;
154
155         if (event != MLX5_EVENT_TYPE_PORT_CHANGE)
156                 return NOTIFY_DONE;
157
158         switch (eqe->sub_type) {
159         case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
160         case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
161                 queue_work(priv->wq, &priv->update_carrier_work);
162                 break;
163         default:
164                 return NOTIFY_DONE;
165         }
166
167         return NOTIFY_OK;
168 }
169
170 static void mlx5e_enable_async_events(struct mlx5e_priv *priv)
171 {
172         priv->events_nb.notifier_call = async_event;
173         mlx5_notifier_register(priv->mdev, &priv->events_nb);
174 }
175
176 static void mlx5e_disable_async_events(struct mlx5e_priv *priv)
177 {
178         mlx5_notifier_unregister(priv->mdev, &priv->events_nb);
179 }
180
181 static int blocking_event(struct notifier_block *nb, unsigned long event, void *data)
182 {
183         struct mlx5e_priv *priv = container_of(nb, struct mlx5e_priv, blocking_events_nb);
184         struct mlx5_devlink_trap_event_ctx *trap_event_ctx = data;
185         int err;
186
187         switch (event) {
188         case MLX5_DRIVER_EVENT_TYPE_TRAP:
189                 err = mlx5e_handle_trap_event(priv, trap_event_ctx->trap);
190                 if (err) {
191                         trap_event_ctx->err = err;
192                         return NOTIFY_BAD;
193                 }
194                 break;
195         default:
196                 return NOTIFY_DONE;
197         }
198         return NOTIFY_OK;
199 }
200
201 static void mlx5e_enable_blocking_events(struct mlx5e_priv *priv)
202 {
203         priv->blocking_events_nb.notifier_call = blocking_event;
204         mlx5_blocking_notifier_register(priv->mdev, &priv->blocking_events_nb);
205 }
206
207 static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
208 {
209         mlx5_blocking_notifier_unregister(priv->mdev, &priv->blocking_events_nb);
210 }
211
212 static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
213 {
214         u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
215         u32 sz;
216
217         sz = ALIGN(entries * umr_entry_size, MLX5_UMR_FLEX_ALIGNMENT);
218
219         return sz / MLX5_OCTWORD;
220 }
221
222 static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
223                                        struct mlx5e_icosq *sq,
224                                        struct mlx5e_umr_wqe *wqe)
225 {
226         struct mlx5_wqe_ctrl_seg      *cseg = &wqe->ctrl;
227         struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
228         u16 octowords;
229         u8 ds_cnt;
230
231         ds_cnt = DIV_ROUND_UP(mlx5e_mpwrq_umr_wqe_sz(rq->mdev, rq->mpwqe.page_shift,
232                                                      rq->mpwqe.umr_mode),
233                               MLX5_SEND_WQE_DS);
234
235         cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
236                                       ds_cnt);
237         cseg->umr_mkey  = rq->mpwqe.umr_mkey_be;
238
239         ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
240         octowords = mlx5e_mpwrq_umr_octowords(rq->mpwqe.pages_per_wqe, rq->mpwqe.umr_mode);
241         ucseg->xlt_octowords = cpu_to_be16(octowords);
242         ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
243 }
244
245 static int mlx5e_rq_shampo_hd_alloc(struct mlx5e_rq *rq, int node)
246 {
247         rq->mpwqe.shampo = kvzalloc_node(sizeof(*rq->mpwqe.shampo),
248                                          GFP_KERNEL, node);
249         if (!rq->mpwqe.shampo)
250                 return -ENOMEM;
251         return 0;
252 }
253
254 static void mlx5e_rq_shampo_hd_free(struct mlx5e_rq *rq)
255 {
256         kvfree(rq->mpwqe.shampo);
257 }
258
259 static int mlx5e_rq_shampo_hd_info_alloc(struct mlx5e_rq *rq, int node)
260 {
261         struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
262
263         shampo->bitmap = bitmap_zalloc_node(shampo->hd_per_wq, GFP_KERNEL,
264                                             node);
265         if (!shampo->bitmap)
266                 return -ENOMEM;
267
268         shampo->info = kvzalloc_node(array_size(shampo->hd_per_wq,
269                                                 sizeof(*shampo->info)),
270                                      GFP_KERNEL, node);
271         if (!shampo->info) {
272                 kvfree(shampo->bitmap);
273                 return -ENOMEM;
274         }
275         return 0;
276 }
277
278 static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq)
279 {
280         kvfree(rq->mpwqe.shampo->bitmap);
281         kvfree(rq->mpwqe.shampo->info);
282 }
283
284 static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
285 {
286         int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
287         size_t alloc_size;
288
289         alloc_size = array_size(wq_sz, struct_size(rq->mpwqe.info, alloc_units,
290                                                    rq->mpwqe.pages_per_wqe));
291
292         rq->mpwqe.info = kvzalloc_node(alloc_size, GFP_KERNEL, node);
293         if (!rq->mpwqe.info)
294                 return -ENOMEM;
295
296         mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
297
298         return 0;
299 }
300
301
302 static u8 mlx5e_mpwrq_access_mode(enum mlx5e_mpwrq_umr_mode umr_mode)
303 {
304         switch (umr_mode) {
305         case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
306                 return MLX5_MKC_ACCESS_MODE_MTT;
307         case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
308                 return MLX5_MKC_ACCESS_MODE_KSM;
309         case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
310                 return MLX5_MKC_ACCESS_MODE_KLMS;
311         case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
312                 return MLX5_MKC_ACCESS_MODE_KSM;
313         }
314         WARN_ONCE(1, "MPWRQ UMR mode %d is not known\n", umr_mode);
315         return 0;
316 }
317
318 static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
319                                  u32 npages, u8 page_shift, u32 *umr_mkey,
320                                  dma_addr_t filler_addr,
321                                  enum mlx5e_mpwrq_umr_mode umr_mode,
322                                  u32 xsk_chunk_size)
323 {
324         struct mlx5_mtt *mtt;
325         struct mlx5_ksm *ksm;
326         struct mlx5_klm *klm;
327         u32 octwords;
328         int inlen;
329         void *mkc;
330         u32 *in;
331         int err;
332         int i;
333
334         if ((umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED ||
335              umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE) &&
336             !MLX5_CAP_GEN(mdev, fixed_buffer_size)) {
337                 mlx5_core_warn(mdev, "Unaligned AF_XDP requires fixed_buffer_size capability\n");
338                 return -EINVAL;
339         }
340
341         octwords = mlx5e_mpwrq_umr_octowords(npages, umr_mode);
342
343         inlen = MLX5_FLEXIBLE_INLEN(mdev, MLX5_ST_SZ_BYTES(create_mkey_in),
344                                     MLX5_OCTWORD, octwords);
345         if (inlen < 0)
346                 return inlen;
347
348         in = kvzalloc(inlen, GFP_KERNEL);
349         if (!in)
350                 return -ENOMEM;
351
352         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
353
354         MLX5_SET(mkc, mkc, free, 1);
355         MLX5_SET(mkc, mkc, umr_en, 1);
356         MLX5_SET(mkc, mkc, lw, 1);
357         MLX5_SET(mkc, mkc, lr, 1);
358         MLX5_SET(mkc, mkc, access_mode_1_0, mlx5e_mpwrq_access_mode(umr_mode));
359         mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
360         MLX5_SET(mkc, mkc, qpn, 0xffffff);
361         MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
362         MLX5_SET64(mkc, mkc, len, npages << page_shift);
363         MLX5_SET(mkc, mkc, translations_octword_size, octwords);
364         if (umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)
365                 MLX5_SET(mkc, mkc, log_page_size, page_shift - 2);
366         else if (umr_mode != MLX5E_MPWRQ_UMR_MODE_OVERSIZED)
367                 MLX5_SET(mkc, mkc, log_page_size, page_shift);
368         MLX5_SET(create_mkey_in, in, translations_octword_actual_size, octwords);
369
370         /* Initialize the mkey with all MTTs pointing to a default
371          * page (filler_addr). When the channels are activated, UMR
372          * WQEs will redirect the RX WQEs to the actual memory from
373          * the RQ's pool, while the gaps (wqe_overflow) remain mapped
374          * to the default page.
375          */
376         switch (umr_mode) {
377         case MLX5E_MPWRQ_UMR_MODE_OVERSIZED:
378                 klm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
379                 for (i = 0; i < npages; i++) {
380                         klm[i << 1] = (struct mlx5_klm) {
381                                 .va = cpu_to_be64(filler_addr),
382                                 .bcount = cpu_to_be32(xsk_chunk_size),
383                                 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
384                         };
385                         klm[(i << 1) + 1] = (struct mlx5_klm) {
386                                 .va = cpu_to_be64(filler_addr),
387                                 .bcount = cpu_to_be32((1 << page_shift) - xsk_chunk_size),
388                                 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
389                         };
390                 }
391                 break;
392         case MLX5E_MPWRQ_UMR_MODE_UNALIGNED:
393                 ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
394                 for (i = 0; i < npages; i++)
395                         ksm[i] = (struct mlx5_ksm) {
396                                 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
397                                 .va = cpu_to_be64(filler_addr),
398                         };
399                 break;
400         case MLX5E_MPWRQ_UMR_MODE_ALIGNED:
401                 mtt = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
402                 for (i = 0; i < npages; i++)
403                         mtt[i] = (struct mlx5_mtt) {
404                                 .ptag = cpu_to_be64(filler_addr),
405                         };
406                 break;
407         case MLX5E_MPWRQ_UMR_MODE_TRIPLE:
408                 ksm = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
409                 for (i = 0; i < npages * 4; i++) {
410                         ksm[i] = (struct mlx5_ksm) {
411                                 .key = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey),
412                                 .va = cpu_to_be64(filler_addr),
413                         };
414                 }
415                 break;
416         }
417
418         err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
419
420         kvfree(in);
421         return err;
422 }
423
424 static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
425                                      u64 nentries,
426                                      u32 *umr_mkey)
427 {
428         int inlen;
429         void *mkc;
430         u32 *in;
431         int err;
432
433         inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
434
435         in = kvzalloc(inlen, GFP_KERNEL);
436         if (!in)
437                 return -ENOMEM;
438
439         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
440
441         MLX5_SET(mkc, mkc, free, 1);
442         MLX5_SET(mkc, mkc, umr_en, 1);
443         MLX5_SET(mkc, mkc, lw, 1);
444         MLX5_SET(mkc, mkc, lr, 1);
445         MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
446         mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
447         MLX5_SET(mkc, mkc, qpn, 0xffffff);
448         MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
449         MLX5_SET(mkc, mkc, translations_octword_size, nentries);
450         MLX5_SET(mkc, mkc, length64, 1);
451         err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
452
453         kvfree(in);
454         return err;
455 }
456
457 static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq *rq)
458 {
459         u32 xsk_chunk_size = rq->xsk_pool ? rq->xsk_pool->chunk_size : 0;
460         u32 wq_size = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
461         u32 num_entries, max_num_entries;
462         u32 umr_mkey;
463         int err;
464
465         max_num_entries = mlx5e_mpwrq_max_num_entries(mdev, rq->mpwqe.umr_mode);
466
467         /* Shouldn't overflow, the result is at most MLX5E_MAX_RQ_NUM_MTTS. */
468         if (WARN_ON_ONCE(check_mul_overflow(wq_size, (u32)rq->mpwqe.mtts_per_wqe,
469                                             &num_entries) ||
470                          num_entries > max_num_entries))
471                 mlx5_core_err(mdev, "%s: multiplication overflow: %u * %u > %u\n",
472                               __func__, wq_size, rq->mpwqe.mtts_per_wqe,
473                               max_num_entries);
474
475         err = mlx5e_create_umr_mkey(mdev, num_entries, rq->mpwqe.page_shift,
476                                     &umr_mkey, rq->wqe_overflow.addr,
477                                     rq->mpwqe.umr_mode, xsk_chunk_size);
478         rq->mpwqe.umr_mkey_be = cpu_to_be32(umr_mkey);
479         return err;
480 }
481
482 static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
483                                        struct mlx5e_rq *rq)
484 {
485         u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
486
487         if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
488                 mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
489                               max_klm_size, rq->mpwqe.shampo->hd_per_wq);
490                 return -EINVAL;
491         }
492         return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
493                                          &rq->mpwqe.shampo->mkey);
494 }
495
496 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
497 {
498         struct mlx5e_wqe_frag_info next_frag = {};
499         struct mlx5e_wqe_frag_info *prev = NULL;
500         int i;
501
502         if (rq->xsk_pool) {
503                 /* Assumptions used by XSK batched allocator. */
504                 WARN_ON(rq->wqe.info.num_frags != 1);
505                 WARN_ON(rq->wqe.info.log_num_frags != 0);
506                 WARN_ON(rq->wqe.info.arr[0].frag_stride != PAGE_SIZE);
507         }
508
509         next_frag.au = &rq->wqe.alloc_units[0];
510
511         for (i = 0; i < mlx5_wq_cyc_get_size(&rq->wqe.wq); i++) {
512                 struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
513                 struct mlx5e_wqe_frag_info *frag =
514                         &rq->wqe.frags[i << rq->wqe.info.log_num_frags];
515                 int f;
516
517                 for (f = 0; f < rq->wqe.info.num_frags; f++, frag++) {
518                         if (next_frag.offset + frag_info[f].frag_stride > PAGE_SIZE) {
519                                 next_frag.au++;
520                                 next_frag.offset = 0;
521                                 if (prev)
522                                         prev->last_in_page = true;
523                         }
524                         *frag = next_frag;
525
526                         /* prepare next */
527                         next_frag.offset += frag_info[f].frag_stride;
528                         prev = frag;
529                 }
530         }
531
532         if (prev)
533                 prev->last_in_page = true;
534 }
535
536 static int mlx5e_init_au_list(struct mlx5e_rq *rq, int wq_sz, int node)
537 {
538         int len = wq_sz << rq->wqe.info.log_num_frags;
539
540         rq->wqe.alloc_units = kvzalloc_node(array_size(len, sizeof(*rq->wqe.alloc_units)),
541                                             GFP_KERNEL, node);
542         if (!rq->wqe.alloc_units)
543                 return -ENOMEM;
544
545         mlx5e_init_frags_partition(rq);
546
547         return 0;
548 }
549
550 static void mlx5e_free_au_list(struct mlx5e_rq *rq)
551 {
552         kvfree(rq->wqe.alloc_units);
553 }
554
555 static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
556 {
557         struct mlx5e_rq *rq = container_of(recover_work, struct mlx5e_rq, recover_work);
558
559         mlx5e_reporter_rq_cqe_err(rq);
560 }
561
562 static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
563 {
564         rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
565         if (!rq->wqe_overflow.page)
566                 return -ENOMEM;
567
568         rq->wqe_overflow.addr = dma_map_page(rq->pdev, rq->wqe_overflow.page, 0,
569                                              PAGE_SIZE, rq->buff.map_dir);
570         if (dma_mapping_error(rq->pdev, rq->wqe_overflow.addr)) {
571                 __free_page(rq->wqe_overflow.page);
572                 return -ENOMEM;
573         }
574         return 0;
575 }
576
577 static void mlx5e_free_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
578 {
579          dma_unmap_page(rq->pdev, rq->wqe_overflow.addr, PAGE_SIZE,
580                         rq->buff.map_dir);
581          __free_page(rq->wqe_overflow.page);
582 }
583
584 static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
585                              struct mlx5e_rq *rq)
586 {
587         struct mlx5_core_dev *mdev = c->mdev;
588         int err;
589
590         rq->wq_type      = params->rq_wq_type;
591         rq->pdev         = c->pdev;
592         rq->netdev       = c->netdev;
593         rq->priv         = c->priv;
594         rq->tstamp       = c->tstamp;
595         rq->clock        = &mdev->clock;
596         rq->icosq        = &c->icosq;
597         rq->ix           = c->ix;
598         rq->channel      = c;
599         rq->mdev         = mdev;
600         rq->hw_mtu =
601                 MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN * !params->scatter_fcs_en;
602         rq->xdpsq        = &c->rq_xdpsq;
603         rq->stats        = &c->priv->channel_stats[c->ix]->rq;
604         rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
605         err = mlx5e_rq_set_handlers(rq, params, NULL);
606         if (err)
607                 return err;
608
609         return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, c->napi.napi_id);
610 }
611
612 static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev,
613                                 struct mlx5e_params *params,
614                                 struct mlx5e_rq_param *rqp,
615                                 struct mlx5e_rq *rq,
616                                 u32 *pool_size,
617                                 int node)
618 {
619         void *wqc = MLX5_ADDR_OF(rqc, rqp->rqc, wq);
620         int wq_size;
621         int err;
622
623         if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
624                 return 0;
625         err = mlx5e_rq_shampo_hd_alloc(rq, node);
626         if (err)
627                 goto out;
628         rq->mpwqe.shampo->hd_per_wq =
629                 mlx5e_shampo_hd_per_wq(mdev, params, rqp);
630         err = mlx5e_create_rq_hd_umr_mkey(mdev, rq);
631         if (err)
632                 goto err_shampo_hd;
633         err = mlx5e_rq_shampo_hd_info_alloc(rq, node);
634         if (err)
635                 goto err_shampo_info;
636         rq->hw_gro_data = kvzalloc_node(sizeof(*rq->hw_gro_data), GFP_KERNEL, node);
637         if (!rq->hw_gro_data) {
638                 err = -ENOMEM;
639                 goto err_hw_gro_data;
640         }
641         rq->mpwqe.shampo->key =
642                 cpu_to_be32(rq->mpwqe.shampo->mkey);
643         rq->mpwqe.shampo->hd_per_wqe =
644                 mlx5e_shampo_hd_per_wqe(mdev, params, rqp);
645         wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
646         *pool_size += (rq->mpwqe.shampo->hd_per_wqe * wq_size) /
647                      MLX5E_SHAMPO_WQ_HEADER_PER_PAGE;
648         return 0;
649
650 err_hw_gro_data:
651         mlx5e_rq_shampo_hd_info_free(rq);
652 err_shampo_info:
653         mlx5_core_destroy_mkey(mdev, rq->mpwqe.shampo->mkey);
654 err_shampo_hd:
655         mlx5e_rq_shampo_hd_free(rq);
656 out:
657         return err;
658 }
659
660 static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq)
661 {
662         if (!test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
663                 return;
664
665         kvfree(rq->hw_gro_data);
666         mlx5e_rq_shampo_hd_info_free(rq);
667         mlx5_core_destroy_mkey(rq->mdev, rq->mpwqe.shampo->mkey);
668         mlx5e_rq_shampo_hd_free(rq);
669 }
670
671 static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev)
672 {
673         u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
674         u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {};
675         int res;
676
677         if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey))
678                 return MLX5_TERMINATE_SCATTER_LIST_LKEY;
679
680         MLX5_SET(query_special_contexts_in, in, opcode,
681                  MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
682         res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out);
683         if (res)
684                 return MLX5_TERMINATE_SCATTER_LIST_LKEY;
685
686         res = MLX5_GET(query_special_contexts_out, out,
687                        terminate_scatter_list_mkey);
688         return cpu_to_be32(res);
689 }
690
691 static int mlx5e_alloc_rq(struct mlx5e_params *params,
692                           struct mlx5e_xsk_param *xsk,
693                           struct mlx5e_rq_param *rqp,
694                           int node, struct mlx5e_rq *rq)
695 {
696         struct page_pool_params pp_params = { 0 };
697         struct mlx5_core_dev *mdev = rq->mdev;
698         void *rqc = rqp->rqc;
699         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
700         u32 pool_size;
701         int wq_sz;
702         int err;
703         int i;
704
705         rqp->wq.db_numa_node = node;
706         INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
707
708         if (params->xdp_prog)
709                 bpf_prog_inc(params->xdp_prog);
710         RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
711
712         rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
713         rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
714         pool_size = 1 << params->log_rq_mtu_frames;
715
716         rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey);
717
718         switch (rq->wq_type) {
719         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
720                 err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
721                                         &rq->wq_ctrl);
722                 if (err)
723                         goto err_rq_xdp_prog;
724
725                 err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
726                 if (err)
727                         goto err_rq_wq_destroy;
728
729                 rq->mpwqe.wq.db = &rq->mpwqe.wq.db[MLX5_RCV_DBR];
730
731                 wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
732
733                 rq->mpwqe.page_shift = mlx5e_mpwrq_page_shift(mdev, xsk);
734                 rq->mpwqe.umr_mode = mlx5e_mpwrq_umr_mode(mdev, xsk);
735                 rq->mpwqe.pages_per_wqe =
736                         mlx5e_mpwrq_pages_per_wqe(mdev, rq->mpwqe.page_shift,
737                                                   rq->mpwqe.umr_mode);
738                 rq->mpwqe.umr_wqebbs =
739                         mlx5e_mpwrq_umr_wqebbs(mdev, rq->mpwqe.page_shift,
740                                                rq->mpwqe.umr_mode);
741                 rq->mpwqe.mtts_per_wqe =
742                         mlx5e_mpwrq_mtts_per_wqe(mdev, rq->mpwqe.page_shift,
743                                                  rq->mpwqe.umr_mode);
744
745                 pool_size = rq->mpwqe.pages_per_wqe <<
746                         mlx5e_mpwqe_get_log_rq_size(mdev, params, xsk);
747
748                 rq->mpwqe.log_stride_sz = mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk);
749                 rq->mpwqe.num_strides =
750                         BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
751                 rq->mpwqe.min_wqe_bulk = mlx5e_mpwqe_get_min_wqe_bulk(wq_sz);
752
753                 rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
754
755                 err = mlx5e_create_rq_umr_mkey(mdev, rq);
756                 if (err)
757                         goto err_rq_drop_page;
758
759                 err = mlx5e_rq_alloc_mpwqe_info(rq, node);
760                 if (err)
761                         goto err_rq_mkey;
762
763                 err = mlx5_rq_shampo_alloc(mdev, params, rqp, rq, &pool_size, node);
764                 if (err)
765                         goto err_free_mpwqe_info;
766
767                 break;
768         default: /* MLX5_WQ_TYPE_CYCLIC */
769                 err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
770                                          &rq->wq_ctrl);
771                 if (err)
772                         goto err_rq_xdp_prog;
773
774                 rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
775
776                 wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
777
778                 rq->wqe.info = rqp->frags_info;
779                 rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
780
781                 rq->wqe.frags =
782                         kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
783                                         (wq_sz << rq->wqe.info.log_num_frags)),
784                                       GFP_KERNEL, node);
785                 if (!rq->wqe.frags) {
786                         err = -ENOMEM;
787                         goto err_rq_wq_destroy;
788                 }
789
790                 err = mlx5e_init_au_list(rq, wq_sz, node);
791                 if (err)
792                         goto err_rq_frags;
793         }
794
795         if (xsk) {
796                 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
797                                                  MEM_TYPE_XSK_BUFF_POOL, NULL);
798                 xsk_pool_set_rxq_info(rq->xsk_pool, &rq->xdp_rxq);
799         } else {
800                 /* Create a page_pool and register it with rxq */
801                 pp_params.order     = 0;
802                 pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
803                 pp_params.pool_size = pool_size;
804                 pp_params.nid       = node;
805                 pp_params.dev       = rq->pdev;
806                 pp_params.dma_dir   = rq->buff.map_dir;
807
808                 /* page_pool can be used even when there is no rq->xdp_prog,
809                  * given page_pool does not handle DMA mapping there is no
810                  * required state to clear. And page_pool gracefully handle
811                  * elevated refcnt.
812                  */
813                 rq->page_pool = page_pool_create(&pp_params);
814                 if (IS_ERR(rq->page_pool)) {
815                         err = PTR_ERR(rq->page_pool);
816                         rq->page_pool = NULL;
817                         goto err_free_by_rq_type;
818                 }
819                 if (xdp_rxq_info_is_reg(&rq->xdp_rxq))
820                         err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
821                                                          MEM_TYPE_PAGE_POOL, rq->page_pool);
822         }
823         if (err)
824                 goto err_destroy_page_pool;
825
826         for (i = 0; i < wq_sz; i++) {
827                 if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
828                         struct mlx5e_rx_wqe_ll *wqe =
829                                 mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
830                         u32 byte_count =
831                                 rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
832                         u64 dma_offset = mul_u32_u32(i, rq->mpwqe.mtts_per_wqe) <<
833                                 rq->mpwqe.page_shift;
834                         u16 headroom = test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state) ?
835                                        0 : rq->buff.headroom;
836
837                         wqe->data[0].addr = cpu_to_be64(dma_offset + headroom);
838                         wqe->data[0].byte_count = cpu_to_be32(byte_count);
839                         wqe->data[0].lkey = rq->mpwqe.umr_mkey_be;
840                 } else {
841                         struct mlx5e_rx_wqe_cyc *wqe =
842                                 mlx5_wq_cyc_get_wqe(&rq->wqe.wq, i);
843                         int f;
844
845                         for (f = 0; f < rq->wqe.info.num_frags; f++) {
846                                 u32 frag_size = rq->wqe.info.arr[f].frag_size |
847                                         MLX5_HW_START_PADDING;
848
849                                 wqe->data[f].byte_count = cpu_to_be32(frag_size);
850                                 wqe->data[f].lkey = rq->mkey_be;
851                         }
852                         /* check if num_frags is not a pow of two */
853                         if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) {
854                                 wqe->data[f].byte_count = 0;
855                                 wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev);
856                                 wqe->data[f].addr = 0;
857                         }
858                 }
859         }
860
861         INIT_WORK(&rq->dim.work, mlx5e_rx_dim_work);
862
863         switch (params->rx_cq_moderation.cq_period_mode) {
864         case MLX5_CQ_PERIOD_MODE_START_FROM_CQE:
865                 rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE;
866                 break;
867         case MLX5_CQ_PERIOD_MODE_START_FROM_EQE:
868         default:
869                 rq->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
870         }
871
872         rq->page_cache.head = 0;
873         rq->page_cache.tail = 0;
874
875         return 0;
876
877 err_destroy_page_pool:
878         page_pool_destroy(rq->page_pool);
879 err_free_by_rq_type:
880         switch (rq->wq_type) {
881         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
882                 mlx5e_rq_free_shampo(rq);
883 err_free_mpwqe_info:
884                 kvfree(rq->mpwqe.info);
885 err_rq_mkey:
886                 mlx5_core_destroy_mkey(mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
887 err_rq_drop_page:
888                 mlx5e_free_mpwqe_rq_drop_page(rq);
889                 break;
890         default: /* MLX5_WQ_TYPE_CYCLIC */
891                 mlx5e_free_au_list(rq);
892 err_rq_frags:
893                 kvfree(rq->wqe.frags);
894         }
895 err_rq_wq_destroy:
896         mlx5_wq_destroy(&rq->wq_ctrl);
897 err_rq_xdp_prog:
898         if (params->xdp_prog)
899                 bpf_prog_put(params->xdp_prog);
900
901         return err;
902 }
903
904 static void mlx5e_free_rq(struct mlx5e_rq *rq)
905 {
906         struct bpf_prog *old_prog;
907         int i;
908
909         if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
910                 old_prog = rcu_dereference_protected(rq->xdp_prog,
911                                                      lockdep_is_held(&rq->priv->state_lock));
912                 if (old_prog)
913                         bpf_prog_put(old_prog);
914         }
915
916         switch (rq->wq_type) {
917         case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
918                 kvfree(rq->mpwqe.info);
919                 mlx5_core_destroy_mkey(rq->mdev, be32_to_cpu(rq->mpwqe.umr_mkey_be));
920                 mlx5e_free_mpwqe_rq_drop_page(rq);
921                 mlx5e_rq_free_shampo(rq);
922                 break;
923         default: /* MLX5_WQ_TYPE_CYCLIC */
924                 kvfree(rq->wqe.frags);
925                 mlx5e_free_au_list(rq);
926         }
927
928         for (i = rq->page_cache.head; i != rq->page_cache.tail;
929              i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) {
930                 /* With AF_XDP, page_cache is not used, so this loop is not
931                  * entered, and it's safe to call mlx5e_page_release_dynamic
932                  * directly.
933                  */
934                 mlx5e_page_release_dynamic(rq, rq->page_cache.page_cache[i], false);
935         }
936
937         xdp_rxq_info_unreg(&rq->xdp_rxq);
938         page_pool_destroy(rq->page_pool);
939         mlx5_wq_destroy(&rq->wq_ctrl);
940 }
941
942 int mlx5e_create_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param)
943 {
944         struct mlx5_core_dev *mdev = rq->mdev;
945         u8 ts_format;
946         void *in;
947         void *rqc;
948         void *wq;
949         int inlen;
950         int err;
951
952         inlen = MLX5_ST_SZ_BYTES(create_rq_in) +
953                 sizeof(u64) * rq->wq_ctrl.buf.npages;
954         in = kvzalloc(inlen, GFP_KERNEL);
955         if (!in)
956                 return -ENOMEM;
957
958         ts_format = mlx5_is_real_time_rq(mdev) ?
959                             MLX5_TIMESTAMP_FORMAT_REAL_TIME :
960                             MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
961         rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
962         wq  = MLX5_ADDR_OF(rqc, rqc, wq);
963
964         memcpy(rqc, param->rqc, sizeof(param->rqc));
965
966         MLX5_SET(rqc,  rqc, cqn,                rq->cq.mcq.cqn);
967         MLX5_SET(rqc,  rqc, state,              MLX5_RQC_STATE_RST);
968         MLX5_SET(rqc,  rqc, ts_format,          ts_format);
969         MLX5_SET(wq,   wq,  log_wq_pg_sz,       rq->wq_ctrl.buf.page_shift -
970                                                 MLX5_ADAPTER_PAGE_SHIFT);
971         MLX5_SET64(wq, wq,  dbr_addr,           rq->wq_ctrl.db.dma);
972
973         if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
974                 MLX5_SET(wq, wq, log_headers_buffer_entry_num,
975                          order_base_2(rq->mpwqe.shampo->hd_per_wq));
976                 MLX5_SET(wq, wq, headers_mkey, rq->mpwqe.shampo->mkey);
977         }
978
979         mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
980                                   (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
981
982         err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);
983
984         kvfree(in);
985
986         return err;
987 }
988
989 static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
990 {
991         struct mlx5_core_dev *mdev = rq->mdev;
992
993         void *in;
994         void *rqc;
995         int inlen;
996         int err;
997
998         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
999         in = kvzalloc(inlen, GFP_KERNEL);
1000         if (!in)
1001                 return -ENOMEM;
1002
1003         if (curr_state == MLX5_RQC_STATE_RST && next_state == MLX5_RQC_STATE_RDY)
1004                 mlx5e_rqwq_reset(rq);
1005
1006         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
1007
1008         MLX5_SET(modify_rq_in, in, rq_state, curr_state);
1009         MLX5_SET(rqc, rqc, state, next_state);
1010
1011         err = mlx5_core_modify_rq(mdev, rq->rqn, in);
1012
1013         kvfree(in);
1014
1015         return err;
1016 }
1017
1018 static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
1019 {
1020         struct net_device *dev = rq->netdev;
1021         int err;
1022
1023         err = mlx5e_modify_rq_state(rq, curr_state, MLX5_RQC_STATE_RST);
1024         if (err) {
1025                 netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
1026                 return err;
1027         }
1028         err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1029         if (err) {
1030                 netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
1031                 return err;
1032         }
1033
1034         return 0;
1035 }
1036
1037 int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
1038 {
1039         mlx5e_free_rx_descs(rq);
1040
1041         return mlx5e_rq_to_ready(rq, curr_state);
1042 }
1043
1044 static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
1045 {
1046         struct mlx5_core_dev *mdev = rq->mdev;
1047         void *in;
1048         void *rqc;
1049         int inlen;
1050         int err;
1051
1052         inlen = MLX5_ST_SZ_BYTES(modify_rq_in);
1053         in = kvzalloc(inlen, GFP_KERNEL);
1054         if (!in)
1055                 return -ENOMEM;
1056
1057         rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
1058
1059         MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY);
1060         MLX5_SET64(modify_rq_in, in, modify_bitmask,
1061                    MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
1062         MLX5_SET(rqc, rqc, vsd, vsd);
1063         MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
1064
1065         err = mlx5_core_modify_rq(mdev, rq->rqn, in);
1066
1067         kvfree(in);
1068
1069         return err;
1070 }
1071
1072 void mlx5e_destroy_rq(struct mlx5e_rq *rq)
1073 {
1074         mlx5_core_destroy_rq(rq->mdev, rq->rqn);
1075 }
1076
1077 int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
1078 {
1079         unsigned long exp_time = jiffies + msecs_to_jiffies(wait_time);
1080
1081         u16 min_wqes = mlx5_min_rx_wqes(rq->wq_type, mlx5e_rqwq_get_size(rq));
1082
1083         do {
1084                 if (mlx5e_rqwq_get_cur_sz(rq) >= min_wqes)
1085                         return 0;
1086
1087                 msleep(20);
1088         } while (time_before(jiffies, exp_time));
1089
1090         netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
1091                     rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
1092
1093         mlx5e_reporter_rx_timeout(rq);
1094         return -ETIMEDOUT;
1095 }
1096
1097 void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq)
1098 {
1099         struct mlx5_wq_ll *wq;
1100         u16 head;
1101         int i;
1102
1103         if (rq->wq_type != MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
1104                 return;
1105
1106         wq = &rq->mpwqe.wq;
1107         head = wq->head;
1108
1109         /* Outstanding UMR WQEs (in progress) start at wq->head */
1110         for (i = 0; i < rq->mpwqe.umr_in_progress; i++) {
1111                 rq->dealloc_wqe(rq, head);
1112                 head = mlx5_wq_ll_get_wqe_next_ix(wq, head);
1113         }
1114
1115         if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) {
1116                 u16 len;
1117
1118                 len = (rq->mpwqe.shampo->pi - rq->mpwqe.shampo->ci) &
1119                       (rq->mpwqe.shampo->hd_per_wq - 1);
1120                 mlx5e_shampo_dealloc_hd(rq, len, rq->mpwqe.shampo->ci, false);
1121                 rq->mpwqe.shampo->pi = rq->mpwqe.shampo->ci;
1122         }
1123
1124         rq->mpwqe.actual_wq_head = wq->head;
1125         rq->mpwqe.umr_in_progress = 0;
1126         rq->mpwqe.umr_completed = 0;
1127 }
1128
1129 void mlx5e_free_rx_descs(struct mlx5e_rq *rq)
1130 {
1131         __be16 wqe_ix_be;
1132         u16 wqe_ix;
1133
1134         if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
1135                 struct mlx5_wq_ll *wq = &rq->mpwqe.wq;
1136
1137                 mlx5e_free_rx_in_progress_descs(rq);
1138
1139                 while (!mlx5_wq_ll_is_empty(wq)) {
1140                         struct mlx5e_rx_wqe_ll *wqe;
1141
1142                         wqe_ix_be = *wq->tail_next;
1143                         wqe_ix    = be16_to_cpu(wqe_ix_be);
1144                         wqe       = mlx5_wq_ll_get_wqe(wq, wqe_ix);
1145                         rq->dealloc_wqe(rq, wqe_ix);
1146                         mlx5_wq_ll_pop(wq, wqe_ix_be,
1147                                        &wqe->next.next_wqe_index);
1148                 }
1149
1150                 if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state))
1151                         mlx5e_shampo_dealloc_hd(rq, rq->mpwqe.shampo->hd_per_wq,
1152                                                 0, true);
1153         } else {
1154                 struct mlx5_wq_cyc *wq = &rq->wqe.wq;
1155
1156                 while (!mlx5_wq_cyc_is_empty(wq)) {
1157                         wqe_ix = mlx5_wq_cyc_get_tail(wq);
1158                         rq->dealloc_wqe(rq, wqe_ix);
1159                         mlx5_wq_cyc_pop(wq);
1160                 }
1161         }
1162
1163 }
1164
1165 int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
1166                   struct mlx5e_xsk_param *xsk, int node,
1167                   struct mlx5e_rq *rq)
1168 {
1169         struct mlx5_core_dev *mdev = rq->mdev;
1170         int err;
1171
1172         if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO)
1173                 __set_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state);
1174
1175         err = mlx5e_alloc_rq(params, xsk, param, node, rq);
1176         if (err)
1177                 return err;
1178
1179         err = mlx5e_create_rq(rq, param);
1180         if (err)
1181                 goto err_free_rq;
1182
1183         err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
1184         if (err)
1185                 goto err_destroy_rq;
1186
1187         if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
1188                 __set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
1189
1190         if (params->rx_dim_enabled)
1191                 __set_bit(MLX5E_RQ_STATE_AM, &rq->state);
1192
1193         /* We disable csum_complete when XDP is enabled since
1194          * XDP programs might manipulate packets which will render
1195          * skb->checksum incorrect.
1196          */
1197         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
1198                 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
1199
1200         /* For CQE compression on striding RQ, use stride index provided by
1201          * HW if capability is supported.
1202          */
1203         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
1204             MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
1205                 __set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
1206
1207         /* For enhanced CQE compression packet processing. decompress
1208          * session according to the enhanced layout.
1209          */
1210         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) &&
1211             MLX5_CAP_GEN(mdev, enhanced_cqe_compression))
1212                 __set_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state);
1213
1214         return 0;
1215
1216 err_destroy_rq:
1217         mlx5e_destroy_rq(rq);
1218 err_free_rq:
1219         mlx5e_free_rq(rq);
1220
1221         return err;
1222 }
1223
1224 void mlx5e_activate_rq(struct mlx5e_rq *rq)
1225 {
1226         set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
1227 }
1228
1229 void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
1230 {
1231         clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
1232         synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */
1233 }
1234
1235 void mlx5e_close_rq(struct mlx5e_rq *rq)
1236 {
1237         cancel_work_sync(&rq->dim.work);
1238         cancel_work_sync(&rq->recover_work);
1239         mlx5e_destroy_rq(rq);
1240         mlx5e_free_rx_descs(rq);
1241         mlx5e_free_rq(rq);
1242 }
1243
1244 static void mlx5e_free_xdpsq_db(struct mlx5e_xdpsq *sq)
1245 {
1246         kvfree(sq->db.xdpi_fifo.xi);
1247         kvfree(sq->db.wqe_info);
1248 }
1249
1250 static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
1251 {
1252         struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
1253         int wq_sz        = mlx5_wq_cyc_get_size(&sq->wq);
1254         int dsegs_per_wq = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
1255         size_t size;
1256
1257         size = array_size(sizeof(*xdpi_fifo->xi), dsegs_per_wq);
1258         xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
1259         if (!xdpi_fifo->xi)
1260                 return -ENOMEM;
1261
1262         xdpi_fifo->pc   = &sq->xdpi_fifo_pc;
1263         xdpi_fifo->cc   = &sq->xdpi_fifo_cc;
1264         xdpi_fifo->mask = dsegs_per_wq - 1;
1265
1266         return 0;
1267 }
1268
1269 static int mlx5e_alloc_xdpsq_db(struct mlx5e_xdpsq *sq, int numa)
1270 {
1271         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1272         size_t size;
1273         int err;
1274
1275         size = array_size(sizeof(*sq->db.wqe_info), wq_sz);
1276         sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
1277         if (!sq->db.wqe_info)
1278                 return -ENOMEM;
1279
1280         err = mlx5e_alloc_xdpsq_fifo(sq, numa);
1281         if (err) {
1282                 mlx5e_free_xdpsq_db(sq);
1283                 return err;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
1290                              struct mlx5e_params *params,
1291                              struct xsk_buff_pool *xsk_pool,
1292                              struct mlx5e_sq_param *param,
1293                              struct mlx5e_xdpsq *sq,
1294                              bool is_redirect)
1295 {
1296         void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1297         struct mlx5_core_dev *mdev = c->mdev;
1298         struct mlx5_wq_cyc *wq = &sq->wq;
1299         int err;
1300
1301         sq->pdev      = c->pdev;
1302         sq->mkey_be   = c->mkey_be;
1303         sq->channel   = c;
1304         sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
1305         sq->min_inline_mode = params->tx_min_inline_mode;
1306         sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
1307         sq->xsk_pool  = xsk_pool;
1308
1309         sq->stats = sq->xsk_pool ?
1310                 &c->priv->channel_stats[c->ix]->xsksq :
1311                 is_redirect ?
1312                         &c->priv->channel_stats[c->ix]->xdpsq :
1313                         &c->priv->channel_stats[c->ix]->rq_xdpsq;
1314         sq->stop_room = param->is_mpw ? mlx5e_stop_room_for_mpwqe(mdev) :
1315                                         mlx5e_stop_room_for_max_wqe(mdev);
1316         sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
1317
1318         param->wq.db_numa_node = cpu_to_node(c->cpu);
1319         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1320         if (err)
1321                 return err;
1322         wq->db = &wq->db[MLX5_SND_DBR];
1323
1324         err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
1325         if (err)
1326                 goto err_sq_wq_destroy;
1327
1328         return 0;
1329
1330 err_sq_wq_destroy:
1331         mlx5_wq_destroy(&sq->wq_ctrl);
1332
1333         return err;
1334 }
1335
1336 static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
1337 {
1338         mlx5e_free_xdpsq_db(sq);
1339         mlx5_wq_destroy(&sq->wq_ctrl);
1340 }
1341
1342 static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
1343 {
1344         kvfree(sq->db.wqe_info);
1345 }
1346
1347 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
1348 {
1349         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1350         size_t size;
1351
1352         size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
1353         sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
1354         if (!sq->db.wqe_info)
1355                 return -ENOMEM;
1356
1357         return 0;
1358 }
1359
1360 static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work)
1361 {
1362         struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
1363                                               recover_work);
1364
1365         mlx5e_reporter_icosq_cqe_err(sq);
1366 }
1367
1368 static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work)
1369 {
1370         struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq,
1371                                               recover_work);
1372
1373         /* Not implemented yet. */
1374
1375         netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n");
1376 }
1377
1378 static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
1379                              struct mlx5e_sq_param *param,
1380                              struct mlx5e_icosq *sq,
1381                              work_func_t recover_work_func)
1382 {
1383         void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1384         struct mlx5_core_dev *mdev = c->mdev;
1385         struct mlx5_wq_cyc *wq = &sq->wq;
1386         int err;
1387
1388         sq->channel   = c;
1389         sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
1390         sq->reserved_room = param->stop_room;
1391
1392         param->wq.db_numa_node = cpu_to_node(c->cpu);
1393         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1394         if (err)
1395                 return err;
1396         wq->db = &wq->db[MLX5_SND_DBR];
1397
1398         err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
1399         if (err)
1400                 goto err_sq_wq_destroy;
1401
1402         INIT_WORK(&sq->recover_work, recover_work_func);
1403
1404         return 0;
1405
1406 err_sq_wq_destroy:
1407         mlx5_wq_destroy(&sq->wq_ctrl);
1408
1409         return err;
1410 }
1411
1412 static void mlx5e_free_icosq(struct mlx5e_icosq *sq)
1413 {
1414         mlx5e_free_icosq_db(sq);
1415         mlx5_wq_destroy(&sq->wq_ctrl);
1416 }
1417
1418 void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq)
1419 {
1420         kvfree(sq->db.wqe_info);
1421         kvfree(sq->db.skb_fifo.fifo);
1422         kvfree(sq->db.dma_fifo);
1423 }
1424
1425 int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
1426 {
1427         int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
1428         int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS;
1429
1430         sq->db.dma_fifo = kvzalloc_node(array_size(df_sz,
1431                                                    sizeof(*sq->db.dma_fifo)),
1432                                         GFP_KERNEL, numa);
1433         sq->db.skb_fifo.fifo = kvzalloc_node(array_size(df_sz,
1434                                                         sizeof(*sq->db.skb_fifo.fifo)),
1435                                         GFP_KERNEL, numa);
1436         sq->db.wqe_info = kvzalloc_node(array_size(wq_sz,
1437                                                    sizeof(*sq->db.wqe_info)),
1438                                         GFP_KERNEL, numa);
1439         if (!sq->db.dma_fifo || !sq->db.skb_fifo.fifo || !sq->db.wqe_info) {
1440                 mlx5e_free_txqsq_db(sq);
1441                 return -ENOMEM;
1442         }
1443
1444         sq->dma_fifo_mask = df_sz - 1;
1445
1446         sq->db.skb_fifo.pc   = &sq->skb_fifo_pc;
1447         sq->db.skb_fifo.cc   = &sq->skb_fifo_cc;
1448         sq->db.skb_fifo.mask = df_sz - 1;
1449
1450         return 0;
1451 }
1452
1453 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
1454                              int txq_ix,
1455                              struct mlx5e_params *params,
1456                              struct mlx5e_sq_param *param,
1457                              struct mlx5e_txqsq *sq,
1458                              int tc)
1459 {
1460         void *sqc_wq               = MLX5_ADDR_OF(sqc, param->sqc, wq);
1461         struct mlx5_core_dev *mdev = c->mdev;
1462         struct mlx5_wq_cyc *wq = &sq->wq;
1463         int err;
1464
1465         sq->pdev      = c->pdev;
1466         sq->clock     = &mdev->clock;
1467         sq->mkey_be   = c->mkey_be;
1468         sq->netdev    = c->netdev;
1469         sq->mdev      = c->mdev;
1470         sq->channel   = c;
1471         sq->priv      = c->priv;
1472         sq->ch_ix     = c->ix;
1473         sq->txq_ix    = txq_ix;
1474         sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
1475         sq->min_inline_mode = params->tx_min_inline_mode;
1476         sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
1477         sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev);
1478         INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
1479         if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
1480                 set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
1481         if (mlx5_ipsec_device_caps(c->priv->mdev))
1482                 set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
1483         if (param->is_mpw)
1484                 set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
1485         sq->stop_room = param->stop_room;
1486         sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
1487
1488         param->wq.db_numa_node = cpu_to_node(c->cpu);
1489         err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
1490         if (err)
1491                 return err;
1492         wq->db    = &wq->db[MLX5_SND_DBR];
1493
1494         err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
1495         if (err)
1496                 goto err_sq_wq_destroy;
1497
1498         INIT_WORK(&sq->dim.work, mlx5e_tx_dim_work);
1499         sq->dim.mode = params->tx_cq_moderation.cq_period_mode;
1500
1501         return 0;
1502
1503 err_sq_wq_destroy:
1504         mlx5_wq_destroy(&sq->wq_ctrl);
1505
1506         return err;
1507 }
1508
1509 void mlx5e_free_txqsq(struct mlx5e_txqsq *sq)
1510 {
1511         mlx5e_free_txqsq_db(sq);
1512         mlx5_wq_destroy(&sq->wq_ctrl);
1513 }
1514
1515 static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
1516                            struct mlx5e_sq_param *param,
1517                            struct mlx5e_create_sq_param *csp,
1518                            u32 *sqn)
1519 {
1520         u8 ts_format;
1521         void *in;
1522         void *sqc;
1523         void *wq;
1524         int inlen;
1525         int err;
1526
1527         inlen = MLX5_ST_SZ_BYTES(create_sq_in) +
1528                 sizeof(u64) * csp->wq_ctrl->buf.npages;
1529         in = kvzalloc(inlen, GFP_KERNEL);
1530         if (!in)
1531                 return -ENOMEM;
1532
1533         ts_format = mlx5_is_real_time_sq(mdev) ?
1534                             MLX5_TIMESTAMP_FORMAT_REAL_TIME :
1535                             MLX5_TIMESTAMP_FORMAT_FREE_RUNNING;
1536         sqc = MLX5_ADDR_OF(create_sq_in, in, ctx);
1537         wq = MLX5_ADDR_OF(sqc, sqc, wq);
1538
1539         memcpy(sqc, param->sqc, sizeof(param->sqc));
1540         MLX5_SET(sqc,  sqc, tis_lst_sz, csp->tis_lst_sz);
1541         MLX5_SET(sqc,  sqc, tis_num_0, csp->tisn);
1542         MLX5_SET(sqc,  sqc, cqn, csp->cqn);
1543         MLX5_SET(sqc,  sqc, ts_cqe_to_dest_cqn, csp->ts_cqe_to_dest_cqn);
1544         MLX5_SET(sqc,  sqc, ts_format, ts_format);
1545
1546
1547         if (MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
1548                 MLX5_SET(sqc,  sqc, min_wqe_inline_mode, csp->min_inline_mode);
1549
1550         MLX5_SET(sqc,  sqc, state, MLX5_SQC_STATE_RST);
1551         MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
1552
1553         MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
1554         MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
1555         MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
1556                                           MLX5_ADAPTER_PAGE_SHIFT);
1557         MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
1558
1559         mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
1560                                   (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
1561
1562         err = mlx5_core_create_sq(mdev, in, inlen, sqn);
1563
1564         kvfree(in);
1565
1566         return err;
1567 }
1568
1569 int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
1570                     struct mlx5e_modify_sq_param *p)
1571 {
1572         u64 bitmask = 0;
1573         void *in;
1574         void *sqc;
1575         int inlen;
1576         int err;
1577
1578         inlen = MLX5_ST_SZ_BYTES(modify_sq_in);
1579         in = kvzalloc(inlen, GFP_KERNEL);
1580         if (!in)
1581                 return -ENOMEM;
1582
1583         sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx);
1584
1585         MLX5_SET(modify_sq_in, in, sq_state, p->curr_state);
1586         MLX5_SET(sqc, sqc, state, p->next_state);
1587         if (p->rl_update && p->next_state == MLX5_SQC_STATE_RDY) {
1588                 bitmask |= 1;
1589                 MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, p->rl_index);
1590         }
1591         if (p->qos_update && p->next_state == MLX5_SQC_STATE_RDY) {
1592                 bitmask |= 1 << 2;
1593                 MLX5_SET(sqc, sqc, qos_queue_group_id, p->qos_queue_group_id);
1594         }
1595         MLX5_SET64(modify_sq_in, in, modify_bitmask, bitmask);
1596
1597         err = mlx5_core_modify_sq(mdev, sqn, in);
1598
1599         kvfree(in);
1600
1601         return err;
1602 }
1603
1604 static void mlx5e_destroy_sq(struct mlx5_core_dev *mdev, u32 sqn)
1605 {
1606         mlx5_core_destroy_sq(mdev, sqn);
1607 }
1608
1609 int mlx5e_create_sq_rdy(struct mlx5_core_dev *mdev,
1610                         struct mlx5e_sq_param *param,
1611                         struct mlx5e_create_sq_param *csp,
1612                         u16 qos_queue_group_id,
1613                         u32 *sqn)
1614 {
1615         struct mlx5e_modify_sq_param msp = {0};
1616         int err;
1617
1618         err = mlx5e_create_sq(mdev, param, csp, sqn);
1619         if (err)
1620                 return err;
1621
1622         msp.curr_state = MLX5_SQC_STATE_RST;
1623         msp.next_state = MLX5_SQC_STATE_RDY;
1624         if (qos_queue_group_id) {
1625                 msp.qos_update = true;
1626                 msp.qos_queue_group_id = qos_queue_group_id;
1627         }
1628         err = mlx5e_modify_sq(mdev, *sqn, &msp);
1629         if (err)
1630                 mlx5e_destroy_sq(mdev, *sqn);
1631
1632         return err;
1633 }
1634
1635 static int mlx5e_set_sq_maxrate(struct net_device *dev,
1636                                 struct mlx5e_txqsq *sq, u32 rate);
1637
1638 int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix,
1639                      struct mlx5e_params *params, struct mlx5e_sq_param *param,
1640                      struct mlx5e_txqsq *sq, int tc, u16 qos_queue_group_id,
1641                      struct mlx5e_sq_stats *sq_stats)
1642 {
1643         struct mlx5e_create_sq_param csp = {};
1644         u32 tx_rate;
1645         int err;
1646
1647         err = mlx5e_alloc_txqsq(c, txq_ix, params, param, sq, tc);
1648         if (err)
1649                 return err;
1650
1651         sq->stats = sq_stats;
1652
1653         csp.tisn            = tisn;
1654         csp.tis_lst_sz      = 1;
1655         csp.cqn             = sq->cq.mcq.cqn;
1656         csp.wq_ctrl         = &sq->wq_ctrl;
1657         csp.min_inline_mode = sq->min_inline_mode;
1658         err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn);
1659         if (err)
1660                 goto err_free_txqsq;
1661
1662         tx_rate = c->priv->tx_rates[sq->txq_ix];
1663         if (tx_rate)
1664                 mlx5e_set_sq_maxrate(c->netdev, sq, tx_rate);
1665
1666         if (params->tx_dim_enabled)
1667                 sq->state |= BIT(MLX5E_SQ_STATE_AM);
1668
1669         return 0;
1670
1671 err_free_txqsq:
1672         mlx5e_free_txqsq(sq);
1673
1674         return err;
1675 }
1676
1677 void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq)
1678 {
1679         sq->txq = netdev_get_tx_queue(sq->netdev, sq->txq_ix);
1680         set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1681         netdev_tx_reset_queue(sq->txq);
1682         netif_tx_start_queue(sq->txq);
1683 }
1684
1685 void mlx5e_tx_disable_queue(struct netdev_queue *txq)
1686 {
1687         __netif_tx_lock_bh(txq);
1688         netif_tx_stop_queue(txq);
1689         __netif_tx_unlock_bh(txq);
1690 }
1691
1692 void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
1693 {
1694         struct mlx5_wq_cyc *wq = &sq->wq;
1695
1696         clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1697         synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
1698
1699         mlx5e_tx_disable_queue(sq->txq);
1700
1701         /* last doorbell out, godspeed .. */
1702         if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
1703                 u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
1704                 struct mlx5e_tx_wqe *nop;
1705
1706                 sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
1707                         .num_wqebbs = 1,
1708                 };
1709
1710                 nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
1711                 mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
1712         }
1713 }
1714
1715 void mlx5e_close_txqsq(struct mlx5e_txqsq *sq)
1716 {
1717         struct mlx5_core_dev *mdev = sq->mdev;
1718         struct mlx5_rate_limit rl = {0};
1719
1720         cancel_work_sync(&sq->dim.work);
1721         cancel_work_sync(&sq->recover_work);
1722         mlx5e_destroy_sq(mdev, sq->sqn);
1723         if (sq->rate_limit) {
1724                 rl.rate = sq->rate_limit;
1725                 mlx5_rl_remove_rate(mdev, &rl);
1726         }
1727         mlx5e_free_txqsq_descs(sq);
1728         mlx5e_free_txqsq(sq);
1729 }
1730
1731 void mlx5e_tx_err_cqe_work(struct work_struct *recover_work)
1732 {
1733         struct mlx5e_txqsq *sq = container_of(recover_work, struct mlx5e_txqsq,
1734                                               recover_work);
1735
1736         mlx5e_reporter_tx_err_cqe(sq);
1737 }
1738
1739 static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params,
1740                             struct mlx5e_sq_param *param, struct mlx5e_icosq *sq,
1741                             work_func_t recover_work_func)
1742 {
1743         struct mlx5e_create_sq_param csp = {};
1744         int err;
1745
1746         err = mlx5e_alloc_icosq(c, param, sq, recover_work_func);
1747         if (err)
1748                 return err;
1749
1750         csp.cqn             = sq->cq.mcq.cqn;
1751         csp.wq_ctrl         = &sq->wq_ctrl;
1752         csp.min_inline_mode = params->tx_min_inline_mode;
1753         err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
1754         if (err)
1755                 goto err_free_icosq;
1756
1757         if (param->is_tls) {
1758                 sq->ktls_resync = mlx5e_ktls_rx_resync_create_resp_list();
1759                 if (IS_ERR(sq->ktls_resync)) {
1760                         err = PTR_ERR(sq->ktls_resync);
1761                         goto err_destroy_icosq;
1762                 }
1763         }
1764         return 0;
1765
1766 err_destroy_icosq:
1767         mlx5e_destroy_sq(c->mdev, sq->sqn);
1768 err_free_icosq:
1769         mlx5e_free_icosq(sq);
1770
1771         return err;
1772 }
1773
1774 void mlx5e_activate_icosq(struct mlx5e_icosq *icosq)
1775 {
1776         set_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
1777 }
1778
1779 void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq)
1780 {
1781         clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state);
1782         synchronize_net(); /* Sync with NAPI. */
1783 }
1784
1785 static void mlx5e_close_icosq(struct mlx5e_icosq *sq)
1786 {
1787         struct mlx5e_channel *c = sq->channel;
1788
1789         if (sq->ktls_resync)
1790                 mlx5e_ktls_rx_resync_destroy_resp_list(sq->ktls_resync);
1791         mlx5e_destroy_sq(c->mdev, sq->sqn);
1792         mlx5e_free_icosq_descs(sq);
1793         mlx5e_free_icosq(sq);
1794 }
1795
1796 int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
1797                      struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool,
1798                      struct mlx5e_xdpsq *sq, bool is_redirect)
1799 {
1800         struct mlx5e_create_sq_param csp = {};
1801         int err;
1802
1803         err = mlx5e_alloc_xdpsq(c, params, xsk_pool, param, sq, is_redirect);
1804         if (err)
1805                 return err;
1806
1807         csp.tis_lst_sz      = 1;
1808         csp.tisn            = c->priv->tisn[c->lag_port][0]; /* tc = 0 */
1809         csp.cqn             = sq->cq.mcq.cqn;
1810         csp.wq_ctrl         = &sq->wq_ctrl;
1811         csp.min_inline_mode = sq->min_inline_mode;
1812         set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1813
1814         /* Don't enable multi buffer on XDP_REDIRECT SQ, as it's not yet
1815          * supported by upstream, and there is no defined trigger to allow
1816          * transmitting redirected multi-buffer frames.
1817          */
1818         if (param->is_xdp_mb && !is_redirect)
1819                 set_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state);
1820
1821         err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn);
1822         if (err)
1823                 goto err_free_xdpsq;
1824
1825         mlx5e_set_xmit_fp(sq, param->is_mpw);
1826
1827         if (!param->is_mpw && !test_bit(MLX5E_SQ_STATE_XDP_MULTIBUF, &sq->state)) {
1828                 unsigned int ds_cnt = MLX5E_TX_WQE_EMPTY_DS_COUNT + 1;
1829                 unsigned int inline_hdr_sz = 0;
1830                 int i;
1831
1832                 if (sq->min_inline_mode != MLX5_INLINE_MODE_NONE) {
1833                         inline_hdr_sz = MLX5E_XDP_MIN_INLINE;
1834                         ds_cnt++;
1835                 }
1836
1837                 /* Pre initialize fixed WQE fields */
1838                 for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
1839                         struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
1840                         struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
1841                         struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
1842                         struct mlx5_wqe_data_seg *dseg;
1843
1844                         sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
1845                                 .num_wqebbs = 1,
1846                                 .num_pkts   = 1,
1847                         };
1848
1849                         cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
1850                         eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
1851
1852                         dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
1853                         dseg->lkey = sq->mkey_be;
1854                 }
1855         }
1856
1857         return 0;
1858
1859 err_free_xdpsq:
1860         clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1861         mlx5e_free_xdpsq(sq);
1862
1863         return err;
1864 }
1865
1866 void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq)
1867 {
1868         struct mlx5e_channel *c = sq->channel;
1869
1870         clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
1871         synchronize_net(); /* Sync with NAPI. */
1872
1873         mlx5e_destroy_sq(c->mdev, sq->sqn);
1874         mlx5e_free_xdpsq_descs(sq);
1875         mlx5e_free_xdpsq(sq);
1876 }
1877
1878 static int mlx5e_alloc_cq_common(struct mlx5e_priv *priv,
1879                                  struct mlx5e_cq_param *param,
1880                                  struct mlx5e_cq *cq)
1881 {
1882         struct mlx5_core_dev *mdev = priv->mdev;
1883         struct mlx5_core_cq *mcq = &cq->mcq;
1884         int err;
1885         u32 i;
1886
1887         err = mlx5_cqwq_create(mdev, &param->wq, param->cqc, &cq->wq,
1888                                &cq->wq_ctrl);
1889         if (err)
1890                 return err;
1891
1892         mcq->cqe_sz     = 64;
1893         mcq->set_ci_db  = cq->wq_ctrl.db.db;
1894         mcq->arm_db     = cq->wq_ctrl.db.db + 1;
1895         *mcq->set_ci_db = 0;
1896         *mcq->arm_db    = 0;
1897         mcq->vector     = param->eq_ix;
1898         mcq->comp       = mlx5e_completion_event;
1899         mcq->event      = mlx5e_cq_error_event;
1900
1901         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1902                 struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1903
1904                 cqe->op_own = 0xf1;
1905                 cqe->validity_iteration_count = 0xff;
1906         }
1907
1908         cq->mdev = mdev;
1909         cq->netdev = priv->netdev;
1910         cq->priv = priv;
1911
1912         return 0;
1913 }
1914
1915 static int mlx5e_alloc_cq(struct mlx5e_priv *priv,
1916                           struct mlx5e_cq_param *param,
1917                           struct mlx5e_create_cq_param *ccp,
1918                           struct mlx5e_cq *cq)
1919 {
1920         int err;
1921
1922         param->wq.buf_numa_node = ccp->node;
1923         param->wq.db_numa_node  = ccp->node;
1924         param->eq_ix            = ccp->ix;
1925
1926         err = mlx5e_alloc_cq_common(priv, param, cq);
1927
1928         cq->napi     = ccp->napi;
1929         cq->ch_stats = ccp->ch_stats;
1930
1931         return err;
1932 }
1933
1934 static void mlx5e_free_cq(struct mlx5e_cq *cq)
1935 {
1936         mlx5_wq_destroy(&cq->wq_ctrl);
1937 }
1938
1939 static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
1940 {
1941         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1942         struct mlx5_core_dev *mdev = cq->mdev;
1943         struct mlx5_core_cq *mcq = &cq->mcq;
1944
1945         void *in;
1946         void *cqc;
1947         int inlen;
1948         int eqn;
1949         int err;
1950
1951         err = mlx5_vector2eqn(mdev, param->eq_ix, &eqn);
1952         if (err)
1953                 return err;
1954
1955         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1956                 sizeof(u64) * cq->wq_ctrl.buf.npages;
1957         in = kvzalloc(inlen, GFP_KERNEL);
1958         if (!in)
1959                 return -ENOMEM;
1960
1961         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1962
1963         memcpy(cqc, param->cqc, sizeof(param->cqc));
1964
1965         mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
1966                                   (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
1967
1968         MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
1969         MLX5_SET(cqc,   cqc, c_eqn_or_apu_element, eqn);
1970         MLX5_SET(cqc,   cqc, uar_page,      mdev->priv.uar->index);
1971         MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1972                                             MLX5_ADAPTER_PAGE_SHIFT);
1973         MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
1974
1975         err = mlx5_core_create_cq(mdev, mcq, in, inlen, out, sizeof(out));
1976
1977         kvfree(in);
1978
1979         if (err)
1980                 return err;
1981
1982         mlx5e_cq_arm(cq);
1983
1984         return 0;
1985 }
1986
1987 static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
1988 {
1989         mlx5_core_destroy_cq(cq->mdev, &cq->mcq);
1990 }
1991
1992 int mlx5e_open_cq(struct mlx5e_priv *priv, struct dim_cq_moder moder,
1993                   struct mlx5e_cq_param *param, struct mlx5e_create_cq_param *ccp,
1994                   struct mlx5e_cq *cq)
1995 {
1996         struct mlx5_core_dev *mdev = priv->mdev;
1997         int err;
1998
1999         err = mlx5e_alloc_cq(priv, param, ccp, cq);
2000         if (err)
2001                 return err;
2002
2003         err = mlx5e_create_cq(cq, param);
2004         if (err)
2005                 goto err_free_cq;
2006
2007         if (MLX5_CAP_GEN(mdev, cq_moderation))
2008                 mlx5_core_modify_cq_moderation(mdev, &cq->mcq, moder.usec, moder.pkts);
2009         return 0;
2010
2011 err_free_cq:
2012         mlx5e_free_cq(cq);
2013
2014         return err;
2015 }
2016
2017 void mlx5e_close_cq(struct mlx5e_cq *cq)
2018 {
2019         mlx5e_destroy_cq(cq);
2020         mlx5e_free_cq(cq);
2021 }
2022
2023 static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
2024                              struct mlx5e_params *params,
2025                              struct mlx5e_create_cq_param *ccp,
2026                              struct mlx5e_channel_param *cparam)
2027 {
2028         int err;
2029         int tc;
2030
2031         for (tc = 0; tc < c->num_tc; tc++) {
2032                 err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->txq_sq.cqp,
2033                                     ccp, &c->sq[tc].cq);
2034                 if (err)
2035                         goto err_close_tx_cqs;
2036         }
2037
2038         return 0;
2039
2040 err_close_tx_cqs:
2041         for (tc--; tc >= 0; tc--)
2042                 mlx5e_close_cq(&c->sq[tc].cq);
2043
2044         return err;
2045 }
2046
2047 static void mlx5e_close_tx_cqs(struct mlx5e_channel *c)
2048 {
2049         int tc;
2050
2051         for (tc = 0; tc < c->num_tc; tc++)
2052                 mlx5e_close_cq(&c->sq[tc].cq);
2053 }
2054
2055 static int mlx5e_mqprio_txq_to_tc(struct netdev_tc_txq *tc_to_txq, unsigned int txq)
2056 {
2057         int tc;
2058
2059         for (tc = 0; tc < TC_MAX_QUEUE; tc++)
2060                 if (txq - tc_to_txq[tc].offset < tc_to_txq[tc].count)
2061                         return tc;
2062
2063         WARN(1, "Unexpected TCs configuration. No match found for txq %u", txq);
2064         return -ENOENT;
2065 }
2066
2067 static int mlx5e_txq_get_qos_node_hw_id(struct mlx5e_params *params, int txq_ix,
2068                                         u32 *hw_id)
2069 {
2070         int tc;
2071
2072         if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL) {
2073                 *hw_id = 0;
2074                 return 0;
2075         }
2076
2077         tc = mlx5e_mqprio_txq_to_tc(params->mqprio.tc_to_txq, txq_ix);
2078         if (tc < 0)
2079                 return tc;
2080
2081         if (tc >= params->mqprio.num_tc) {
2082                 WARN(1, "Unexpected TCs configuration. tc %d is out of range of %u",
2083                      tc, params->mqprio.num_tc);
2084                 return -EINVAL;
2085         }
2086
2087         *hw_id = params->mqprio.channel.hw_id[tc];
2088         return 0;
2089 }
2090
2091 static int mlx5e_open_sqs(struct mlx5e_channel *c,
2092                           struct mlx5e_params *params,
2093                           struct mlx5e_channel_param *cparam)
2094 {
2095         int err, tc;
2096
2097         for (tc = 0; tc < mlx5e_get_dcb_num_tc(params); tc++) {
2098                 int txq_ix = c->ix + tc * params->num_channels;
2099                 u32 qos_queue_group_id;
2100
2101                 err = mlx5e_txq_get_qos_node_hw_id(params, txq_ix, &qos_queue_group_id);
2102                 if (err)
2103                         goto err_close_sqs;
2104
2105                 err = mlx5e_open_txqsq(c, c->priv->tisn[c->lag_port][tc], txq_ix,
2106                                        params, &cparam->txq_sq, &c->sq[tc], tc,
2107                                        qos_queue_group_id,
2108                                        &c->priv->channel_stats[c->ix]->sq[tc]);
2109                 if (err)
2110                         goto err_close_sqs;
2111         }
2112
2113         return 0;
2114
2115 err_close_sqs:
2116         for (tc--; tc >= 0; tc--)
2117                 mlx5e_close_txqsq(&c->sq[tc]);
2118
2119         return err;
2120 }
2121
2122 static void mlx5e_close_sqs(struct mlx5e_channel *c)
2123 {
2124         int tc;
2125
2126         for (tc = 0; tc < c->num_tc; tc++)
2127                 mlx5e_close_txqsq(&c->sq[tc]);
2128 }
2129
2130 static int mlx5e_set_sq_maxrate(struct net_device *dev,
2131                                 struct mlx5e_txqsq *sq, u32 rate)
2132 {
2133         struct mlx5e_priv *priv = netdev_priv(dev);
2134         struct mlx5_core_dev *mdev = priv->mdev;
2135         struct mlx5e_modify_sq_param msp = {0};
2136         struct mlx5_rate_limit rl = {0};
2137         u16 rl_index = 0;
2138         int err;
2139
2140         if (rate == sq->rate_limit)
2141                 /* nothing to do */
2142                 return 0;
2143
2144         if (sq->rate_limit) {
2145                 rl.rate = sq->rate_limit;
2146                 /* remove current rl index to free space to next ones */
2147                 mlx5_rl_remove_rate(mdev, &rl);
2148         }
2149
2150         sq->rate_limit = 0;
2151
2152         if (rate) {
2153                 rl.rate = rate;
2154                 err = mlx5_rl_add_rate(mdev, &rl_index, &rl);
2155                 if (err) {
2156                         netdev_err(dev, "Failed configuring rate %u: %d\n",
2157                                    rate, err);
2158                         return err;
2159                 }
2160         }
2161
2162         msp.curr_state = MLX5_SQC_STATE_RDY;
2163         msp.next_state = MLX5_SQC_STATE_RDY;
2164         msp.rl_index   = rl_index;
2165         msp.rl_update  = true;
2166         err = mlx5e_modify_sq(mdev, sq->sqn, &msp);
2167         if (err) {
2168                 netdev_err(dev, "Failed configuring rate %u: %d\n",
2169                            rate, err);
2170                 /* remove the rate from the table */
2171                 if (rate)
2172                         mlx5_rl_remove_rate(mdev, &rl);
2173                 return err;
2174         }
2175
2176         sq->rate_limit = rate;
2177         return 0;
2178 }
2179
2180 static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
2181 {
2182         struct mlx5e_priv *priv = netdev_priv(dev);
2183         struct mlx5_core_dev *mdev = priv->mdev;
2184         struct mlx5e_txqsq *sq = priv->txq2sq[index];
2185         int err = 0;
2186
2187         if (!mlx5_rl_is_supported(mdev)) {
2188                 netdev_err(dev, "Rate limiting is not supported on this device\n");
2189                 return -EINVAL;
2190         }
2191
2192         /* rate is given in Mb/sec, HW config is in Kb/sec */
2193         rate = rate << 10;
2194
2195         /* Check whether rate in valid range, 0 is always valid */
2196         if (rate && !mlx5_rl_is_in_range(mdev, rate)) {
2197                 netdev_err(dev, "TX rate %u, is not in range\n", rate);
2198                 return -ERANGE;
2199         }
2200
2201         mutex_lock(&priv->state_lock);
2202         if (test_bit(MLX5E_STATE_OPENED, &priv->state))
2203                 err = mlx5e_set_sq_maxrate(dev, sq, rate);
2204         if (!err)
2205                 priv->tx_rates[index] = rate;
2206         mutex_unlock(&priv->state_lock);
2207
2208         return err;
2209 }
2210
2211 static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
2212                              struct mlx5e_rq_param *rq_params)
2213 {
2214         int err;
2215
2216         err = mlx5e_init_rxq_rq(c, params, &c->rq);
2217         if (err)
2218                 return err;
2219
2220         return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
2221 }
2222
2223 static int mlx5e_open_queues(struct mlx5e_channel *c,
2224                              struct mlx5e_params *params,
2225                              struct mlx5e_channel_param *cparam)
2226 {
2227         struct dim_cq_moder icocq_moder = {0, 0};
2228         struct mlx5e_create_cq_param ccp;
2229         int err;
2230
2231         mlx5e_build_create_cq_param(&ccp, c);
2232
2233         err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp,
2234                             &c->async_icosq.cq);
2235         if (err)
2236                 return err;
2237
2238         err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp,
2239                             &c->icosq.cq);
2240         if (err)
2241                 goto err_close_async_icosq_cq;
2242
2243         err = mlx5e_open_tx_cqs(c, params, &ccp, cparam);
2244         if (err)
2245                 goto err_close_icosq_cq;
2246
2247         err = mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp, &ccp,
2248                             &c->xdpsq.cq);
2249         if (err)
2250                 goto err_close_tx_cqs;
2251
2252         err = mlx5e_open_cq(c->priv, params->rx_cq_moderation, &cparam->rq.cqp, &ccp,
2253                             &c->rq.cq);
2254         if (err)
2255                 goto err_close_xdp_tx_cqs;
2256
2257         err = c->xdp ? mlx5e_open_cq(c->priv, params->tx_cq_moderation, &cparam->xdp_sq.cqp,
2258                                      &ccp, &c->rq_xdpsq.cq) : 0;
2259         if (err)
2260                 goto err_close_rx_cq;
2261
2262         spin_lock_init(&c->async_icosq_lock);
2263
2264         err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq,
2265                                mlx5e_async_icosq_err_cqe_work);
2266         if (err)
2267                 goto err_close_xdpsq_cq;
2268
2269         mutex_init(&c->icosq_recovery_lock);
2270
2271         err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq,
2272                                mlx5e_icosq_err_cqe_work);
2273         if (err)
2274                 goto err_close_async_icosq;
2275
2276         err = mlx5e_open_sqs(c, params, cparam);
2277         if (err)
2278                 goto err_close_icosq;
2279
2280         err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
2281         if (err)
2282                 goto err_close_sqs;
2283
2284         if (c->xdp) {
2285                 err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL,
2286                                        &c->rq_xdpsq, false);
2287                 if (err)
2288                         goto err_close_rq;
2289         }
2290
2291         err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, NULL, &c->xdpsq, true);
2292         if (err)
2293                 goto err_close_xdp_sq;
2294
2295         return 0;
2296
2297 err_close_xdp_sq:
2298         if (c->xdp)
2299                 mlx5e_close_xdpsq(&c->rq_xdpsq);
2300
2301 err_close_rq:
2302         mlx5e_close_rq(&c->rq);
2303
2304 err_close_sqs:
2305         mlx5e_close_sqs(c);
2306
2307 err_close_icosq:
2308         mlx5e_close_icosq(&c->icosq);
2309
2310 err_close_async_icosq:
2311         mlx5e_close_icosq(&c->async_icosq);
2312
2313 err_close_xdpsq_cq:
2314         if (c->xdp)
2315                 mlx5e_close_cq(&c->rq_xdpsq.cq);
2316
2317 err_close_rx_cq:
2318         mlx5e_close_cq(&c->rq.cq);
2319
2320 err_close_xdp_tx_cqs:
2321         mlx5e_close_cq(&c->xdpsq.cq);
2322
2323 err_close_tx_cqs:
2324         mlx5e_close_tx_cqs(c);
2325
2326 err_close_icosq_cq:
2327         mlx5e_close_cq(&c->icosq.cq);
2328
2329 err_close_async_icosq_cq:
2330         mlx5e_close_cq(&c->async_icosq.cq);
2331
2332         return err;
2333 }
2334
2335 static void mlx5e_close_queues(struct mlx5e_channel *c)
2336 {
2337         mlx5e_close_xdpsq(&c->xdpsq);
2338         if (c->xdp)
2339                 mlx5e_close_xdpsq(&c->rq_xdpsq);
2340         /* The same ICOSQ is used for UMRs for both RQ and XSKRQ. */
2341         cancel_work_sync(&c->icosq.recover_work);
2342         mlx5e_close_rq(&c->rq);
2343         mlx5e_close_sqs(c);
2344         mlx5e_close_icosq(&c->icosq);
2345         mutex_destroy(&c->icosq_recovery_lock);
2346         mlx5e_close_icosq(&c->async_icosq);
2347         if (c->xdp)
2348                 mlx5e_close_cq(&c->rq_xdpsq.cq);
2349         mlx5e_close_cq(&c->rq.cq);
2350         mlx5e_close_cq(&c->xdpsq.cq);
2351         mlx5e_close_tx_cqs(c);
2352         mlx5e_close_cq(&c->icosq.cq);
2353         mlx5e_close_cq(&c->async_icosq.cq);
2354 }
2355
2356 static u8 mlx5e_enumerate_lag_port(struct mlx5_core_dev *mdev, int ix)
2357 {
2358         u16 port_aff_bias = mlx5_core_is_pf(mdev) ? 0 : MLX5_CAP_GEN(mdev, vhca_id);
2359
2360         return (ix + port_aff_bias) % mlx5e_get_num_lag_ports(mdev);
2361 }
2362
2363 static int mlx5e_channel_stats_alloc(struct mlx5e_priv *priv, int ix, int cpu)
2364 {
2365         if (ix > priv->stats_nch)  {
2366                 netdev_warn(priv->netdev, "Unexpected channel stats index %d > %d\n", ix,
2367                             priv->stats_nch);
2368                 return -EINVAL;
2369         }
2370
2371         if (priv->channel_stats[ix])
2372                 return 0;
2373
2374         /* Asymmetric dynamic memory allocation.
2375          * Freed in mlx5e_priv_arrays_free, not on channel closure.
2376          */
2377         mlx5e_dbg(DRV, priv, "Creating channel stats %d\n", ix);
2378         priv->channel_stats[ix] = kvzalloc_node(sizeof(**priv->channel_stats),
2379                                                 GFP_KERNEL, cpu_to_node(cpu));
2380         if (!priv->channel_stats[ix])
2381                 return -ENOMEM;
2382         priv->stats_nch++;
2383
2384         return 0;
2385 }
2386
2387 void mlx5e_trigger_napi_icosq(struct mlx5e_channel *c)
2388 {
2389         spin_lock_bh(&c->async_icosq_lock);
2390         mlx5e_trigger_irq(&c->async_icosq);
2391         spin_unlock_bh(&c->async_icosq_lock);
2392 }
2393
2394 void mlx5e_trigger_napi_sched(struct napi_struct *napi)
2395 {
2396         local_bh_disable();
2397         napi_schedule(napi);
2398         local_bh_enable();
2399 }
2400
2401 static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
2402                               struct mlx5e_params *params,
2403                               struct mlx5e_channel_param *cparam,
2404                               struct xsk_buff_pool *xsk_pool,
2405                               struct mlx5e_channel **cp)
2406 {
2407         int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));
2408         struct net_device *netdev = priv->netdev;
2409         struct mlx5e_xsk_param xsk;
2410         struct mlx5e_channel *c;
2411         unsigned int irq;
2412         int err;
2413
2414         err = mlx5_vector2irqn(priv->mdev, ix, &irq);
2415         if (err)
2416                 return err;
2417
2418         err = mlx5e_channel_stats_alloc(priv, ix, cpu);
2419         if (err)
2420                 return err;
2421
2422         c = kvzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
2423         if (!c)
2424                 return -ENOMEM;
2425
2426         c->priv     = priv;
2427         c->mdev     = priv->mdev;
2428         c->tstamp   = &priv->tstamp;
2429         c->ix       = ix;
2430         c->cpu      = cpu;
2431         c->pdev     = mlx5_core_dma_dev(priv->mdev);
2432         c->netdev   = priv->netdev;
2433         c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey);
2434         c->num_tc   = mlx5e_get_dcb_num_tc(params);
2435         c->xdp      = !!params->xdp_prog;
2436         c->stats    = &priv->channel_stats[ix]->ch;
2437         c->aff_mask = irq_get_effective_affinity_mask(irq);
2438         c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
2439
2440         netif_napi_add(netdev, &c->napi, mlx5e_napi_poll);
2441
2442         err = mlx5e_open_queues(c, params, cparam);
2443         if (unlikely(err))
2444                 goto err_napi_del;
2445
2446         if (xsk_pool) {
2447                 mlx5e_build_xsk_param(xsk_pool, &xsk);
2448                 err = mlx5e_open_xsk(priv, params, &xsk, xsk_pool, c);
2449                 if (unlikely(err))
2450                         goto err_close_queues;
2451         }
2452
2453         *cp = c;
2454
2455         return 0;
2456
2457 err_close_queues:
2458         mlx5e_close_queues(c);
2459
2460 err_napi_del:
2461         netif_napi_del(&c->napi);
2462
2463         kvfree(c);
2464
2465         return err;
2466 }
2467
2468 static void mlx5e_activate_channel(struct mlx5e_channel *c)
2469 {
2470         int tc;
2471
2472         napi_enable(&c->napi);
2473
2474         for (tc = 0; tc < c->num_tc; tc++)
2475                 mlx5e_activate_txqsq(&c->sq[tc]);
2476         mlx5e_activate_icosq(&c->icosq);
2477         mlx5e_activate_icosq(&c->async_icosq);
2478
2479         if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2480                 mlx5e_activate_xsk(c);
2481         else
2482                 mlx5e_activate_rq(&c->rq);
2483 }
2484
2485 static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
2486 {
2487         int tc;
2488
2489         if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2490                 mlx5e_deactivate_xsk(c);
2491         else
2492                 mlx5e_deactivate_rq(&c->rq);
2493
2494         mlx5e_deactivate_icosq(&c->async_icosq);
2495         mlx5e_deactivate_icosq(&c->icosq);
2496         for (tc = 0; tc < c->num_tc; tc++)
2497                 mlx5e_deactivate_txqsq(&c->sq[tc]);
2498         mlx5e_qos_deactivate_queues(c);
2499
2500         napi_disable(&c->napi);
2501 }
2502
2503 static void mlx5e_close_channel(struct mlx5e_channel *c)
2504 {
2505         if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2506                 mlx5e_close_xsk(c);
2507         mlx5e_close_queues(c);
2508         mlx5e_qos_close_queues(c);
2509         netif_napi_del(&c->napi);
2510
2511         kvfree(c);
2512 }
2513
2514 int mlx5e_open_channels(struct mlx5e_priv *priv,
2515                         struct mlx5e_channels *chs)
2516 {
2517         struct mlx5e_channel_param *cparam;
2518         int err = -ENOMEM;
2519         int i;
2520
2521         chs->num = chs->params.num_channels;
2522
2523         chs->c = kcalloc(chs->num, sizeof(struct mlx5e_channel *), GFP_KERNEL);
2524         cparam = kvzalloc(sizeof(struct mlx5e_channel_param), GFP_KERNEL);
2525         if (!chs->c || !cparam)
2526                 goto err_free;
2527
2528         err = mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
2529         if (err)
2530                 goto err_free;
2531
2532         for (i = 0; i < chs->num; i++) {
2533                 struct xsk_buff_pool *xsk_pool = NULL;
2534
2535                 if (chs->params.xdp_prog)
2536                         xsk_pool = mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, i);
2537
2538                 err = mlx5e_open_channel(priv, i, &chs->params, cparam, xsk_pool, &chs->c[i]);
2539                 if (err)
2540                         goto err_close_channels;
2541         }
2542
2543         if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
2544                 err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
2545                 if (err)
2546                         goto err_close_channels;
2547         }
2548
2549         if (priv->htb) {
2550                 err = mlx5e_qos_open_queues(priv, chs);
2551                 if (err)
2552                         goto err_close_ptp;
2553         }
2554
2555         mlx5e_health_channels_update(priv);
2556         kvfree(cparam);
2557         return 0;
2558
2559 err_close_ptp:
2560         if (chs->ptp)
2561                 mlx5e_ptp_close(chs->ptp);
2562
2563 err_close_channels:
2564         for (i--; i >= 0; i--)
2565                 mlx5e_close_channel(chs->c[i]);
2566
2567 err_free:
2568         kfree(chs->c);
2569         kvfree(cparam);
2570         chs->num = 0;
2571         return err;
2572 }
2573
2574 static void mlx5e_activate_channels(struct mlx5e_priv *priv, struct mlx5e_channels *chs)
2575 {
2576         int i;
2577
2578         for (i = 0; i < chs->num; i++)
2579                 mlx5e_activate_channel(chs->c[i]);
2580
2581         if (priv->htb)
2582                 mlx5e_qos_activate_queues(priv);
2583
2584         for (i = 0; i < chs->num; i++)
2585                 mlx5e_trigger_napi_icosq(chs->c[i]);
2586
2587         if (chs->ptp)
2588                 mlx5e_ptp_activate_channel(chs->ptp);
2589 }
2590
2591 static int mlx5e_wait_channels_min_rx_wqes(struct mlx5e_channels *chs)
2592 {
2593         int err = 0;
2594         int i;
2595
2596         for (i = 0; i < chs->num; i++) {
2597                 int timeout = err ? 0 : MLX5E_RQ_WQES_TIMEOUT;
2598                 struct mlx5e_channel *c = chs->c[i];
2599
2600                 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
2601                         continue;
2602
2603                 err |= mlx5e_wait_for_min_rx_wqes(&c->rq, timeout);
2604
2605                 /* Don't wait on the XSK RQ, because the newer xdpsock sample
2606                  * doesn't provide any Fill Ring entries at the setup stage.
2607                  */
2608         }
2609
2610         return err ? -ETIMEDOUT : 0;
2611 }
2612
2613 static void mlx5e_deactivate_channels(struct mlx5e_channels *chs)
2614 {
2615         int i;
2616
2617         if (chs->ptp)
2618                 mlx5e_ptp_deactivate_channel(chs->ptp);
2619
2620         for (i = 0; i < chs->num; i++)
2621                 mlx5e_deactivate_channel(chs->c[i]);
2622 }
2623
2624 void mlx5e_close_channels(struct mlx5e_channels *chs)
2625 {
2626         int i;
2627
2628         if (chs->ptp) {
2629                 mlx5e_ptp_close(chs->ptp);
2630                 chs->ptp = NULL;
2631         }
2632         for (i = 0; i < chs->num; i++)
2633                 mlx5e_close_channel(chs->c[i]);
2634
2635         kfree(chs->c);
2636         chs->num = 0;
2637 }
2638
2639 static int mlx5e_modify_tirs_packet_merge(struct mlx5e_priv *priv)
2640 {
2641         struct mlx5e_rx_res *res = priv->rx_res;
2642
2643         return mlx5e_rx_res_packet_merge_set_param(res, &priv->channels.params.packet_merge);
2644 }
2645
2646 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_modify_tirs_packet_merge);
2647
2648 static int mlx5e_set_mtu(struct mlx5_core_dev *mdev,
2649                          struct mlx5e_params *params, u16 mtu)
2650 {
2651         u16 hw_mtu = MLX5E_SW2HW_MTU(params, mtu);
2652         int err;
2653
2654         err = mlx5_set_port_mtu(mdev, hw_mtu, 1);
2655         if (err)
2656                 return err;
2657
2658         /* Update vport context MTU */
2659         mlx5_modify_nic_vport_mtu(mdev, hw_mtu);
2660         return 0;
2661 }
2662
2663 static void mlx5e_query_mtu(struct mlx5_core_dev *mdev,
2664                             struct mlx5e_params *params, u16 *mtu)
2665 {
2666         u16 hw_mtu = 0;
2667         int err;
2668
2669         err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
2670         if (err || !hw_mtu) /* fallback to port oper mtu */
2671                 mlx5_query_port_oper_mtu(mdev, &hw_mtu, 1);
2672
2673         *mtu = MLX5E_HW2SW_MTU(params, hw_mtu);
2674 }
2675
2676 int mlx5e_set_dev_port_mtu(struct mlx5e_priv *priv)
2677 {
2678         struct mlx5e_params *params = &priv->channels.params;
2679         struct net_device *netdev = priv->netdev;
2680         struct mlx5_core_dev *mdev = priv->mdev;
2681         u16 mtu;
2682         int err;
2683
2684         err = mlx5e_set_mtu(mdev, params, params->sw_mtu);
2685         if (err)
2686                 return err;
2687
2688         mlx5e_query_mtu(mdev, params, &mtu);
2689         if (mtu != params->sw_mtu)
2690                 netdev_warn(netdev, "%s: VPort MTU %d is different than netdev mtu %d\n",
2691                             __func__, mtu, params->sw_mtu);
2692
2693         params->sw_mtu = mtu;
2694         return 0;
2695 }
2696
2697 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_set_dev_port_mtu);
2698
2699 void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
2700 {
2701         struct mlx5e_params *params = &priv->channels.params;
2702         struct net_device *netdev   = priv->netdev;
2703         struct mlx5_core_dev *mdev  = priv->mdev;
2704         u16 max_mtu;
2705
2706         /* MTU range: 68 - hw-specific max */
2707         netdev->min_mtu = ETH_MIN_MTU;
2708
2709         mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
2710         netdev->max_mtu = min_t(unsigned int, MLX5E_HW2SW_MTU(params, max_mtu),
2711                                 ETH_MAX_MTU);
2712 }
2713
2714 static int mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc,
2715                                 struct netdev_tc_txq *tc_to_txq)
2716 {
2717         int tc, err;
2718
2719         netdev_reset_tc(netdev);
2720
2721         if (ntc == 1)
2722                 return 0;
2723
2724         err = netdev_set_num_tc(netdev, ntc);
2725         if (err) {
2726                 netdev_WARN(netdev, "netdev_set_num_tc failed (%d), ntc = %d\n", err, ntc);
2727                 return err;
2728         }
2729
2730         for (tc = 0; tc < ntc; tc++) {
2731                 u16 count, offset;
2732
2733                 count = tc_to_txq[tc].count;
2734                 offset = tc_to_txq[tc].offset;
2735                 netdev_set_tc_queue(netdev, tc, count, offset);
2736         }
2737
2738         return 0;
2739 }
2740
2741 int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv)
2742 {
2743         int nch, ntc, num_txqs, err;
2744         int qos_queues = 0;
2745
2746         if (priv->htb)
2747                 qos_queues = mlx5e_htb_cur_leaf_nodes(priv->htb);
2748
2749         nch = priv->channels.params.num_channels;
2750         ntc = mlx5e_get_dcb_num_tc(&priv->channels.params);
2751         num_txqs = nch * ntc + qos_queues;
2752         if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_TX_PORT_TS))
2753                 num_txqs += ntc;
2754
2755         mlx5e_dbg(DRV, priv, "Setting num_txqs %d\n", num_txqs);
2756         err = netif_set_real_num_tx_queues(priv->netdev, num_txqs);
2757         if (err)
2758                 netdev_warn(priv->netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
2759
2760         return err;
2761 }
2762
2763 static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
2764 {
2765         struct netdev_tc_txq old_tc_to_txq[TC_MAX_QUEUE], *tc_to_txq;
2766         struct net_device *netdev = priv->netdev;
2767         int old_num_txqs, old_ntc;
2768         int nch, ntc;
2769         int err;
2770         int i;
2771
2772         old_num_txqs = netdev->real_num_tx_queues;
2773         old_ntc = netdev->num_tc ? : 1;
2774         for (i = 0; i < ARRAY_SIZE(old_tc_to_txq); i++)
2775                 old_tc_to_txq[i] = netdev->tc_to_txq[i];
2776
2777         nch = priv->channels.params.num_channels;
2778         ntc = priv->channels.params.mqprio.num_tc;
2779         tc_to_txq = priv->channels.params.mqprio.tc_to_txq;
2780
2781         err = mlx5e_netdev_set_tcs(netdev, nch, ntc, tc_to_txq);
2782         if (err)
2783                 goto err_out;
2784         err = mlx5e_update_tx_netdev_queues(priv);
2785         if (err)
2786                 goto err_tcs;
2787         err = netif_set_real_num_rx_queues(netdev, nch);
2788         if (err) {
2789                 netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
2790                 goto err_txqs;
2791         }
2792
2793         return 0;
2794
2795 err_txqs:
2796         /* netif_set_real_num_rx_queues could fail only when nch increased. Only
2797          * one of nch and ntc is changed in this function. That means, the call
2798          * to netif_set_real_num_tx_queues below should not fail, because it
2799          * decreases the number of TX queues.
2800          */
2801         WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
2802
2803 err_tcs:
2804         WARN_ON_ONCE(mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc,
2805                                           old_tc_to_txq));
2806 err_out:
2807         return err;
2808 }
2809
2810 static MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_update_netdev_queues);
2811
2812 static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
2813                                            struct mlx5e_params *params)
2814 {
2815         struct mlx5_core_dev *mdev = priv->mdev;
2816         int num_comp_vectors, ix, irq;
2817
2818         num_comp_vectors = mlx5_comp_vectors_count(mdev);
2819
2820         for (ix = 0; ix < params->num_channels; ix++) {
2821                 cpumask_clear(priv->scratchpad.cpumask);
2822
2823                 for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) {
2824                         int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(mdev, irq));
2825
2826                         cpumask_set_cpu(cpu, priv->scratchpad.cpumask);
2827                 }
2828
2829                 netif_set_xps_queue(priv->netdev, priv->scratchpad.cpumask, ix);
2830         }
2831 }
2832
2833 static int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
2834 {
2835         u16 count = priv->channels.params.num_channels;
2836         int err;
2837
2838         err = mlx5e_update_netdev_queues(priv);
2839         if (err)
2840                 return err;
2841
2842         mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
2843
2844         /* This function may be called on attach, before priv->rx_res is created. */
2845         if (!netif_is_rxfh_configured(priv->netdev) && priv->rx_res)
2846                 mlx5e_rx_res_rss_set_indir_uniform(priv->rx_res, count);
2847
2848         return 0;
2849 }
2850
2851 MLX5E_DEFINE_PREACTIVATE_WRAPPER_CTX(mlx5e_num_channels_changed);
2852
2853 static void mlx5e_build_txq_maps(struct mlx5e_priv *priv)
2854 {
2855         int i, ch, tc, num_tc;
2856
2857         ch = priv->channels.num;
2858         num_tc = mlx5e_get_dcb_num_tc(&priv->channels.params);
2859
2860         for (i = 0; i < ch; i++) {
2861                 for (tc = 0; tc < num_tc; tc++) {
2862                         struct mlx5e_channel *c = priv->channels.c[i];
2863                         struct mlx5e_txqsq *sq = &c->sq[tc];
2864
2865                         priv->txq2sq[sq->txq_ix] = sq;
2866                 }
2867         }
2868
2869         if (!priv->channels.ptp)
2870                 goto out;
2871
2872         if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
2873                 goto out;
2874
2875         for (tc = 0; tc < num_tc; tc++) {
2876                 struct mlx5e_ptp *c = priv->channels.ptp;
2877                 struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
2878
2879                 priv->txq2sq[sq->txq_ix] = sq;
2880         }
2881
2882 out:
2883         /* Make the change to txq2sq visible before the queue is started.
2884          * As mlx5e_xmit runs under a spinlock, there is an implicit ACQUIRE,
2885          * which pairs with this barrier.
2886          */
2887         smp_wmb();
2888 }
2889
2890 void mlx5e_activate_priv_channels(struct mlx5e_priv *priv)
2891 {
2892         mlx5e_build_txq_maps(priv);
2893         mlx5e_activate_channels(priv, &priv->channels);
2894         mlx5e_xdp_tx_enable(priv);
2895
2896         /* dev_watchdog() wants all TX queues to be started when the carrier is
2897          * OK, including the ones in range real_num_tx_queues..num_tx_queues-1.
2898          * Make it happy to avoid TX timeout false alarms.
2899          */
2900         netif_tx_start_all_queues(priv->netdev);
2901
2902         if (mlx5e_is_vport_rep(priv))
2903                 mlx5e_rep_activate_channels(priv);
2904
2905         mlx5e_wait_channels_min_rx_wqes(&priv->channels);
2906
2907         if (priv->rx_res)
2908                 mlx5e_rx_res_channels_activate(priv->rx_res, &priv->channels);
2909 }
2910
2911 void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv)
2912 {
2913         if (priv->rx_res)
2914                 mlx5e_rx_res_channels_deactivate(priv->rx_res);
2915
2916         if (mlx5e_is_vport_rep(priv))
2917                 mlx5e_rep_deactivate_channels(priv);
2918
2919         /* The results of ndo_select_queue are unreliable, while netdev config
2920          * is being changed (real_num_tx_queues, num_tc). Stop all queues to
2921          * prevent ndo_start_xmit from being called, so that it can assume that
2922          * the selected queue is always valid.
2923          */
2924         netif_tx_disable(priv->netdev);
2925
2926         mlx5e_xdp_tx_disable(priv);
2927         mlx5e_deactivate_channels(&priv->channels);
2928 }
2929
2930 static int mlx5e_switch_priv_params(struct mlx5e_priv *priv,
2931                                     struct mlx5e_params *new_params,
2932                                     mlx5e_fp_preactivate preactivate,
2933                                     void *context)
2934 {
2935         struct mlx5e_params old_params;
2936
2937         old_params = priv->channels.params;
2938         priv->channels.params = *new_params;
2939
2940         if (preactivate) {
2941                 int err;
2942
2943                 err = preactivate(priv, context);
2944                 if (err) {
2945                         priv->channels.params = old_params;
2946                         return err;
2947                 }
2948         }
2949
2950         return 0;
2951 }
2952
2953 static int mlx5e_switch_priv_channels(struct mlx5e_priv *priv,
2954                                       struct mlx5e_channels *new_chs,
2955                                       mlx5e_fp_preactivate preactivate,
2956                                       void *context)
2957 {
2958         struct net_device *netdev = priv->netdev;
2959         struct mlx5e_channels old_chs;
2960         int carrier_ok;
2961         int err = 0;
2962
2963         carrier_ok = netif_carrier_ok(netdev);
2964         netif_carrier_off(netdev);
2965
2966         mlx5e_deactivate_priv_channels(priv);
2967
2968         old_chs = priv->channels;
2969         priv->channels = *new_chs;
2970
2971         /* New channels are ready to roll, call the preactivate hook if needed
2972          * to modify HW settings or update kernel parameters.
2973          */
2974         if (preactivate) {
2975                 err = preactivate(priv, context);
2976                 if (err) {
2977                         priv->channels = old_chs;
2978                         goto out;
2979                 }
2980         }
2981
2982         mlx5e_close_channels(&old_chs);
2983         priv->profile->update_rx(priv);
2984
2985         mlx5e_selq_apply(&priv->selq);
2986 out:
2987         mlx5e_activate_priv_channels(priv);
2988
2989         /* return carrier back if needed */
2990         if (carrier_ok)
2991                 netif_carrier_on(netdev);
2992
2993         return err;
2994 }
2995
2996 int mlx5e_safe_switch_params(struct mlx5e_priv *priv,
2997                              struct mlx5e_params *params,
2998                              mlx5e_fp_preactivate preactivate,
2999                              void *context, bool reset)
3000 {
3001         struct mlx5e_channels *new_chs;
3002         int err;
3003
3004         reset &= test_bit(MLX5E_STATE_OPENED, &priv->state);
3005         if (!reset)
3006                 return mlx5e_switch_priv_params(priv, params, preactivate, context);
3007
3008         new_chs = kzalloc(sizeof(*new_chs), GFP_KERNEL);
3009         if (!new_chs)
3010                 return -ENOMEM;
3011         new_chs->params = *params;
3012
3013         mlx5e_selq_prepare_params(&priv->selq, &new_chs->params);
3014
3015         err = mlx5e_open_channels(priv, new_chs);
3016         if (err)
3017                 goto err_cancel_selq;
3018
3019         err = mlx5e_switch_priv_channels(priv, new_chs, preactivate, context);
3020         if (err)
3021                 goto err_close;
3022
3023         kfree(new_chs);
3024         return 0;
3025
3026 err_close:
3027         mlx5e_close_channels(new_chs);
3028
3029 err_cancel_selq:
3030         mlx5e_selq_cancel(&priv->selq);
3031         kfree(new_chs);
3032         return err;
3033 }
3034
3035 int mlx5e_safe_reopen_channels(struct mlx5e_priv *priv)
3036 {
3037         return mlx5e_safe_switch_params(priv, &priv->channels.params, NULL, NULL, true);
3038 }
3039
3040 void mlx5e_timestamp_init(struct mlx5e_priv *priv)
3041 {
3042         priv->tstamp.tx_type   = HWTSTAMP_TX_OFF;
3043         priv->tstamp.rx_filter = HWTSTAMP_FILTER_NONE;
3044 }
3045
3046 static void mlx5e_modify_admin_state(struct mlx5_core_dev *mdev,
3047                                      enum mlx5_port_status state)
3048 {
3049         struct mlx5_eswitch *esw = mdev->priv.eswitch;
3050         int vport_admin_state;
3051
3052         mlx5_set_port_admin_status(mdev, state);
3053
3054         if (mlx5_eswitch_mode(mdev) == MLX5_ESWITCH_OFFLOADS ||
3055             !MLX5_CAP_GEN(mdev, uplink_follow))
3056                 return;
3057
3058         if (state == MLX5_PORT_UP)
3059                 vport_admin_state = MLX5_VPORT_ADMIN_STATE_AUTO;
3060         else
3061                 vport_admin_state = MLX5_VPORT_ADMIN_STATE_DOWN;
3062
3063         mlx5_eswitch_set_vport_state(esw, MLX5_VPORT_UPLINK, vport_admin_state);
3064 }
3065
3066 int mlx5e_open_locked(struct net_device *netdev)
3067 {
3068         struct mlx5e_priv *priv = netdev_priv(netdev);
3069         int err;
3070
3071         mlx5e_selq_prepare_params(&priv->selq, &priv->channels.params);
3072
3073         set_bit(MLX5E_STATE_OPENED, &priv->state);
3074
3075         err = mlx5e_open_channels(priv, &priv->channels);
3076         if (err)
3077                 goto err_clear_state_opened_flag;
3078
3079         err = priv->profile->update_rx(priv);
3080         if (err)
3081                 goto err_close_channels;
3082
3083         mlx5e_selq_apply(&priv->selq);
3084         mlx5e_activate_priv_channels(priv);
3085         mlx5e_apply_traps(priv, true);
3086         if (priv->profile->update_carrier)
3087                 priv->profile->update_carrier(priv);
3088
3089         mlx5e_queue_update_stats(priv);
3090         return 0;
3091
3092 err_close_channels:
3093         mlx5e_close_channels(&priv->channels);
3094 err_clear_state_opened_flag:
3095         clear_bit(MLX5E_STATE_OPENED, &priv->state);
3096         mlx5e_selq_cancel(&priv->selq);
3097         return err;
3098 }
3099
3100 int mlx5e_open(struct net_device *netdev)
3101 {
3102         struct mlx5e_priv *priv = netdev_priv(netdev);
3103         int err;
3104
3105         mutex_lock(&priv->state_lock);
3106         err = mlx5e_open_locked(netdev);
3107         if (!err)
3108                 mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_UP);
3109         mutex_unlock(&priv->state_lock);
3110
3111         return err;
3112 }
3113
3114 int mlx5e_close_locked(struct net_device *netdev)
3115 {
3116         struct mlx5e_priv *priv = netdev_priv(netdev);
3117
3118         /* May already be CLOSED in case a previous configuration operation
3119          * (e.g RX/TX queue size change) that involves close&open failed.
3120          */
3121         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3122                 return 0;
3123
3124         mlx5e_apply_traps(priv, false);
3125         clear_bit(MLX5E_STATE_OPENED, &priv->state);
3126
3127         netif_carrier_off(priv->netdev);
3128         mlx5e_deactivate_priv_channels(priv);
3129         mlx5e_close_channels(&priv->channels);
3130
3131         return 0;
3132 }
3133
3134 int mlx5e_close(struct net_device *netdev)
3135 {
3136         struct mlx5e_priv *priv = netdev_priv(netdev);
3137         int err;
3138
3139         if (!netif_device_present(netdev))
3140                 return -ENODEV;
3141
3142         mutex_lock(&priv->state_lock);
3143         mlx5e_modify_admin_state(priv->mdev, MLX5_PORT_DOWN);
3144         err = mlx5e_close_locked(netdev);
3145         mutex_unlock(&priv->state_lock);
3146
3147         return err;
3148 }
3149
3150 static void mlx5e_free_drop_rq(struct mlx5e_rq *rq)
3151 {
3152         mlx5_wq_destroy(&rq->wq_ctrl);
3153 }
3154
3155 static int mlx5e_alloc_drop_rq(struct mlx5_core_dev *mdev,
3156                                struct mlx5e_rq *rq,
3157                                struct mlx5e_rq_param *param)
3158 {
3159         void *rqc = param->rqc;
3160         void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
3161         int err;
3162
3163         param->wq.db_numa_node = param->wq.buf_numa_node;
3164
3165         err = mlx5_wq_cyc_create(mdev, &param->wq, rqc_wq, &rq->wqe.wq,
3166                                  &rq->wq_ctrl);
3167         if (err)
3168                 return err;
3169
3170         /* Mark as unused given "Drop-RQ" packets never reach XDP */
3171         xdp_rxq_info_unused(&rq->xdp_rxq);
3172
3173         rq->mdev = mdev;
3174
3175         return 0;
3176 }
3177
3178 static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv,
3179                                struct mlx5e_cq *cq,
3180                                struct mlx5e_cq_param *param)
3181 {
3182         struct mlx5_core_dev *mdev = priv->mdev;
3183
3184         param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
3185         param->wq.db_numa_node  = dev_to_node(mlx5_core_dma_dev(mdev));
3186
3187         return mlx5e_alloc_cq_common(priv, param, cq);
3188 }
3189
3190 int mlx5e_open_drop_rq(struct mlx5e_priv *priv,
3191                        struct mlx5e_rq *drop_rq)
3192 {
3193         struct mlx5_core_dev *mdev = priv->mdev;
3194         struct mlx5e_cq_param cq_param = {};
3195         struct mlx5e_rq_param rq_param = {};
3196         struct mlx5e_cq *cq = &drop_rq->cq;
3197         int err;
3198
3199         mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
3200
3201         err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
3202         if (err)
3203                 return err;
3204
3205         err = mlx5e_create_cq(cq, &cq_param);
3206         if (err)
3207                 goto err_free_cq;
3208
3209         err = mlx5e_alloc_drop_rq(mdev, drop_rq, &rq_param);
3210         if (err)
3211                 goto err_destroy_cq;
3212
3213         err = mlx5e_create_rq(drop_rq, &rq_param);
3214         if (err)
3215                 goto err_free_rq;
3216
3217         err = mlx5e_modify_rq_state(drop_rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
3218         if (err)
3219                 mlx5_core_warn(priv->mdev, "modify_rq_state failed, rx_if_down_packets won't be counted %d\n", err);
3220
3221         return 0;
3222
3223 err_free_rq:
3224         mlx5e_free_drop_rq(drop_rq);
3225
3226 err_destroy_cq:
3227         mlx5e_destroy_cq(cq);
3228
3229 err_free_cq:
3230         mlx5e_free_cq(cq);
3231
3232         return err;
3233 }
3234
3235 void mlx5e_close_drop_rq(struct mlx5e_rq *drop_rq)
3236 {
3237         mlx5e_destroy_rq(drop_rq);
3238         mlx5e_free_drop_rq(drop_rq);
3239         mlx5e_destroy_cq(&drop_rq->cq);
3240         mlx5e_free_cq(&drop_rq->cq);
3241 }
3242
3243 int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
3244 {
3245         void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
3246
3247         MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
3248
3249         if (MLX5_GET(tisc, tisc, tls_en))
3250                 MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
3251
3252         if (mlx5_lag_is_lacp_owner(mdev))
3253                 MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
3254
3255         return mlx5_core_create_tis(mdev, in, tisn);
3256 }
3257
3258 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
3259 {
3260         mlx5_core_destroy_tis(mdev, tisn);
3261 }
3262
3263 void mlx5e_destroy_tises(struct mlx5e_priv *priv)
3264 {
3265         int tc, i;
3266
3267         for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++)
3268                 for (tc = 0; tc < priv->profile->max_tc; tc++)
3269                         mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
3270 }
3271
3272 static bool mlx5e_lag_should_assign_affinity(struct mlx5_core_dev *mdev)
3273 {
3274         return MLX5_CAP_GEN(mdev, lag_tx_port_affinity) && mlx5e_get_num_lag_ports(mdev) > 1;
3275 }
3276
3277 int mlx5e_create_tises(struct mlx5e_priv *priv)
3278 {
3279         int tc, i;
3280         int err;
3281
3282         for (i = 0; i < mlx5e_get_num_lag_ports(priv->mdev); i++) {
3283                 for (tc = 0; tc < priv->profile->max_tc; tc++) {
3284                         u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
3285                         void *tisc;
3286
3287                         tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
3288
3289                         MLX5_SET(tisc, tisc, prio, tc << 1);
3290
3291                         if (mlx5e_lag_should_assign_affinity(priv->mdev))
3292                                 MLX5_SET(tisc, tisc, lag_tx_port_affinity, i + 1);
3293
3294                         err = mlx5e_create_tis(priv->mdev, in, &priv->tisn[i][tc]);
3295                         if (err)
3296                                 goto err_close_tises;
3297                 }
3298         }
3299
3300         return 0;
3301
3302 err_close_tises:
3303         for (; i >= 0; i--) {
3304                 for (tc--; tc >= 0; tc--)
3305                         mlx5e_destroy_tis(priv->mdev, priv->tisn[i][tc]);
3306                 tc = priv->profile->max_tc;
3307         }
3308
3309         return err;
3310 }
3311
3312 static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv)
3313 {
3314         if (priv->mqprio_rl) {
3315                 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
3316                 mlx5e_mqprio_rl_free(priv->mqprio_rl);
3317                 priv->mqprio_rl = NULL;
3318         }
3319         mlx5e_accel_cleanup_tx(priv);
3320         mlx5e_destroy_tises(priv);
3321 }
3322
3323 static int mlx5e_modify_channels_vsd(struct mlx5e_channels *chs, bool vsd)
3324 {
3325         int err;
3326         int i;
3327
3328         for (i = 0; i < chs->num; i++) {
3329                 err = mlx5e_modify_rq_vsd(&chs->c[i]->rq, vsd);
3330                 if (err)
3331                         return err;
3332         }
3333         if (chs->ptp && test_bit(MLX5E_PTP_STATE_RX, chs->ptp->state))
3334                 return mlx5e_modify_rq_vsd(&chs->ptp->rq, vsd);
3335
3336         return 0;
3337 }
3338
3339 static void mlx5e_mqprio_build_default_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
3340                                                  int ntc, int nch)
3341 {
3342         int tc;
3343
3344         memset(tc_to_txq, 0, sizeof(*tc_to_txq) * TC_MAX_QUEUE);
3345
3346         /* Map netdev TCs to offset 0.
3347          * We have our own UP to TXQ mapping for DCB mode of QoS
3348          */
3349         for (tc = 0; tc < ntc; tc++) {
3350                 tc_to_txq[tc] = (struct netdev_tc_txq) {
3351                         .count = nch,
3352                         .offset = 0,
3353                 };
3354         }
3355 }
3356
3357 static void mlx5e_mqprio_build_tc_to_txq(struct netdev_tc_txq *tc_to_txq,
3358                                          struct tc_mqprio_qopt *qopt)
3359 {
3360         int tc;
3361
3362         for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
3363                 tc_to_txq[tc] = (struct netdev_tc_txq) {
3364                         .count = qopt->count[tc],
3365                         .offset = qopt->offset[tc],
3366                 };
3367         }
3368 }
3369
3370 static void mlx5e_params_mqprio_dcb_set(struct mlx5e_params *params, u8 num_tc)
3371 {
3372         params->mqprio.mode = TC_MQPRIO_MODE_DCB;
3373         params->mqprio.num_tc = num_tc;
3374         mlx5e_mqprio_build_default_tc_to_txq(params->mqprio.tc_to_txq, num_tc,
3375                                              params->num_channels);
3376 }
3377
3378 static void mlx5e_mqprio_rl_update_params(struct mlx5e_params *params,
3379                                           struct mlx5e_mqprio_rl *rl)
3380 {
3381         int tc;
3382
3383         for (tc = 0; tc < TC_MAX_QUEUE; tc++) {
3384                 u32 hw_id = 0;
3385
3386                 if (rl)
3387                         mlx5e_mqprio_rl_get_node_hw_id(rl, tc, &hw_id);
3388                 params->mqprio.channel.hw_id[tc] = hw_id;
3389         }
3390 }
3391
3392 static void mlx5e_params_mqprio_channel_set(struct mlx5e_params *params,
3393                                             struct tc_mqprio_qopt_offload *mqprio,
3394                                             struct mlx5e_mqprio_rl *rl)
3395 {
3396         int tc;
3397
3398         params->mqprio.mode = TC_MQPRIO_MODE_CHANNEL;
3399         params->mqprio.num_tc = mqprio->qopt.num_tc;
3400
3401         for (tc = 0; tc < TC_MAX_QUEUE; tc++)
3402                 params->mqprio.channel.max_rate[tc] = mqprio->max_rate[tc];
3403
3404         mlx5e_mqprio_rl_update_params(params, rl);
3405         mlx5e_mqprio_build_tc_to_txq(params->mqprio.tc_to_txq, &mqprio->qopt);
3406 }
3407
3408 static void mlx5e_params_mqprio_reset(struct mlx5e_params *params)
3409 {
3410         mlx5e_params_mqprio_dcb_set(params, 1);
3411 }
3412
3413 static int mlx5e_setup_tc_mqprio_dcb(struct mlx5e_priv *priv,
3414                                      struct tc_mqprio_qopt *mqprio)
3415 {
3416         struct mlx5e_params new_params;
3417         u8 tc = mqprio->num_tc;
3418         int err;
3419
3420         mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
3421
3422         if (tc && tc != MLX5E_MAX_NUM_TC)
3423                 return -EINVAL;
3424
3425         new_params = priv->channels.params;
3426         mlx5e_params_mqprio_dcb_set(&new_params, tc ? tc : 1);
3427
3428         err = mlx5e_safe_switch_params(priv, &new_params,
3429                                        mlx5e_num_channels_changed_ctx, NULL, true);
3430
3431         if (!err && priv->mqprio_rl) {
3432                 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
3433                 mlx5e_mqprio_rl_free(priv->mqprio_rl);
3434                 priv->mqprio_rl = NULL;
3435         }
3436
3437         priv->max_opened_tc = max_t(u8, priv->max_opened_tc,
3438                                     mlx5e_get_dcb_num_tc(&priv->channels.params));
3439         return err;
3440 }
3441
3442 static int mlx5e_mqprio_channel_validate(struct mlx5e_priv *priv,
3443                                          struct tc_mqprio_qopt_offload *mqprio)
3444 {
3445         struct net_device *netdev = priv->netdev;
3446         struct mlx5e_ptp *ptp_channel;
3447         int agg_count = 0;
3448         int i;
3449
3450         ptp_channel = priv->channels.ptp;
3451         if (ptp_channel && test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state)) {
3452                 netdev_err(netdev,
3453                            "Cannot activate MQPRIO mode channel since it conflicts with TX port TS\n");
3454                 return -EINVAL;
3455         }
3456
3457         if (mqprio->qopt.offset[0] != 0 || mqprio->qopt.num_tc < 1 ||
3458             mqprio->qopt.num_tc > MLX5E_MAX_NUM_MQPRIO_CH_TC)
3459                 return -EINVAL;
3460
3461         for (i = 0; i < mqprio->qopt.num_tc; i++) {
3462                 if (!mqprio->qopt.count[i]) {
3463                         netdev_err(netdev, "Zero size for queue-group (%d) is not supported\n", i);
3464                         return -EINVAL;
3465                 }
3466                 if (mqprio->min_rate[i]) {
3467                         netdev_err(netdev, "Min tx rate is not supported\n");
3468                         return -EINVAL;
3469                 }
3470
3471                 if (mqprio->max_rate[i]) {
3472                         int err;
3473
3474                         err = mlx5e_qos_bytes_rate_check(priv->mdev, mqprio->max_rate[i]);
3475                         if (err)
3476                                 return err;
3477                 }
3478
3479                 if (mqprio->qopt.offset[i] != agg_count) {
3480                         netdev_err(netdev, "Discontinuous queues config is not supported\n");
3481                         return -EINVAL;
3482                 }
3483                 agg_count += mqprio->qopt.count[i];
3484         }
3485
3486         if (priv->channels.params.num_channels != agg_count) {
3487                 netdev_err(netdev, "Num of queues (%d) does not match available (%d)\n",
3488                            agg_count, priv->channels.params.num_channels);
3489                 return -EINVAL;
3490         }
3491
3492         return 0;
3493 }
3494
3495 static bool mlx5e_mqprio_rate_limit(u8 num_tc, u64 max_rate[])
3496 {
3497         int tc;
3498
3499         for (tc = 0; tc < num_tc; tc++)
3500                 if (max_rate[tc])
3501                         return true;
3502         return false;
3503 }
3504
3505 static struct mlx5e_mqprio_rl *mlx5e_mqprio_rl_create(struct mlx5_core_dev *mdev,
3506                                                       u8 num_tc, u64 max_rate[])
3507 {
3508         struct mlx5e_mqprio_rl *rl;
3509         int err;
3510
3511         if (!mlx5e_mqprio_rate_limit(num_tc, max_rate))
3512                 return NULL;
3513
3514         rl = mlx5e_mqprio_rl_alloc();
3515         if (!rl)
3516                 return ERR_PTR(-ENOMEM);
3517
3518         err = mlx5e_mqprio_rl_init(rl, mdev, num_tc, max_rate);
3519         if (err) {
3520                 mlx5e_mqprio_rl_free(rl);
3521                 return ERR_PTR(err);
3522         }
3523
3524         return rl;
3525 }
3526
3527 static int mlx5e_setup_tc_mqprio_channel(struct mlx5e_priv *priv,
3528                                          struct tc_mqprio_qopt_offload *mqprio)
3529 {
3530         mlx5e_fp_preactivate preactivate;
3531         struct mlx5e_params new_params;
3532         struct mlx5e_mqprio_rl *rl;
3533         bool nch_changed;
3534         int err;
3535
3536         err = mlx5e_mqprio_channel_validate(priv, mqprio);
3537         if (err)
3538                 return err;
3539
3540         rl = mlx5e_mqprio_rl_create(priv->mdev, mqprio->qopt.num_tc, mqprio->max_rate);
3541         if (IS_ERR(rl))
3542                 return PTR_ERR(rl);
3543
3544         new_params = priv->channels.params;
3545         mlx5e_params_mqprio_channel_set(&new_params, mqprio, rl);
3546
3547         nch_changed = mlx5e_get_dcb_num_tc(&priv->channels.params) > 1;
3548         preactivate = nch_changed ? mlx5e_num_channels_changed_ctx :
3549                 mlx5e_update_netdev_queues_ctx;
3550         err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, true);
3551         if (err) {
3552                 if (rl) {
3553                         mlx5e_mqprio_rl_cleanup(rl);
3554                         mlx5e_mqprio_rl_free(rl);
3555                 }
3556                 return err;
3557         }
3558
3559         if (priv->mqprio_rl) {
3560                 mlx5e_mqprio_rl_cleanup(priv->mqprio_rl);
3561                 mlx5e_mqprio_rl_free(priv->mqprio_rl);
3562         }
3563         priv->mqprio_rl = rl;
3564
3565         return 0;
3566 }
3567
3568 static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv,
3569                                  struct tc_mqprio_qopt_offload *mqprio)
3570 {
3571         /* MQPRIO is another toplevel qdisc that can't be attached
3572          * simultaneously with the offloaded HTB.
3573          */
3574         if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq)))
3575                 return -EINVAL;
3576
3577         switch (mqprio->mode) {
3578         case TC_MQPRIO_MODE_DCB:
3579                 return mlx5e_setup_tc_mqprio_dcb(priv, &mqprio->qopt);
3580         case TC_MQPRIO_MODE_CHANNEL:
3581                 return mlx5e_setup_tc_mqprio_channel(priv, mqprio);
3582         default:
3583                 return -EOPNOTSUPP;
3584         }
3585 }
3586
3587 static LIST_HEAD(mlx5e_block_cb_list);
3588
3589 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
3590                           void *type_data)
3591 {
3592         struct mlx5e_priv *priv = netdev_priv(dev);
3593         bool tc_unbind = false;
3594         int err;
3595
3596         if (type == TC_SETUP_BLOCK &&
3597             ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
3598                 tc_unbind = true;
3599
3600         if (!netif_device_present(dev) && !tc_unbind)
3601                 return -ENODEV;
3602
3603         switch (type) {
3604         case TC_SETUP_BLOCK: {
3605                 struct flow_block_offload *f = type_data;
3606
3607                 f->unlocked_driver_cb = true;
3608                 return flow_block_cb_setup_simple(type_data,
3609                                                   &mlx5e_block_cb_list,
3610                                                   mlx5e_setup_tc_block_cb,
3611                                                   priv, priv, true);
3612         }
3613         case TC_SETUP_QDISC_MQPRIO:
3614                 mutex_lock(&priv->state_lock);
3615                 err = mlx5e_setup_tc_mqprio(priv, type_data);
3616                 mutex_unlock(&priv->state_lock);
3617                 return err;
3618         case TC_SETUP_QDISC_HTB:
3619                 mutex_lock(&priv->state_lock);
3620                 err = mlx5e_htb_setup_tc(priv, type_data);
3621                 mutex_unlock(&priv->state_lock);
3622                 return err;
3623         default:
3624                 return -EOPNOTSUPP;
3625         }
3626 }
3627
3628 void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
3629 {
3630         int i;
3631
3632         for (i = 0; i < priv->stats_nch; i++) {
3633                 struct mlx5e_channel_stats *channel_stats = priv->channel_stats[i];
3634                 struct mlx5e_rq_stats *xskrq_stats = &channel_stats->xskrq;
3635                 struct mlx5e_rq_stats *rq_stats = &channel_stats->rq;
3636                 int j;
3637
3638                 s->rx_packets   += rq_stats->packets + xskrq_stats->packets;
3639                 s->rx_bytes     += rq_stats->bytes + xskrq_stats->bytes;
3640                 s->multicast    += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
3641
3642                 for (j = 0; j < priv->max_opened_tc; j++) {
3643                         struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
3644
3645                         s->tx_packets    += sq_stats->packets;
3646                         s->tx_bytes      += sq_stats->bytes;
3647                         s->tx_dropped    += sq_stats->dropped;
3648                 }
3649         }
3650         if (priv->tx_ptp_opened) {
3651                 for (i = 0; i < priv->max_opened_tc; i++) {
3652                         struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
3653
3654                         s->tx_packets    += sq_stats->packets;
3655                         s->tx_bytes      += sq_stats->bytes;
3656                         s->tx_dropped    += sq_stats->dropped;
3657                 }
3658         }
3659         if (priv->rx_ptp_opened) {
3660                 struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
3661
3662                 s->rx_packets   += rq_stats->packets;
3663                 s->rx_bytes     += rq_stats->bytes;
3664                 s->multicast    += rq_stats->mcast_packets;
3665         }
3666 }
3667
3668 void
3669 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
3670 {
3671         struct mlx5e_priv *priv = netdev_priv(dev);
3672         struct mlx5e_pport_stats *pstats = &priv->stats.pport;
3673
3674         if (!netif_device_present(dev))
3675                 return;
3676
3677         /* In switchdev mode, monitor counters doesn't monitor
3678          * rx/tx stats of 802_3. The update stats mechanism
3679          * should keep the 802_3 layout counters updated
3680          */
3681         if (!mlx5e_monitor_counter_supported(priv) ||
3682             mlx5e_is_uplink_rep(priv)) {
3683                 /* update HW stats in background for next time */
3684                 mlx5e_queue_update_stats(priv);
3685         }
3686
3687         if (mlx5e_is_uplink_rep(priv)) {
3688                 struct mlx5e_vport_stats *vstats = &priv->stats.vport;
3689
3690                 stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
3691                 stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
3692                 stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
3693                 stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
3694
3695                 /* vport multicast also counts packets that are dropped due to steering
3696                  * or rx out of buffer
3697                  */
3698                 stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
3699         } else {
3700                 mlx5e_fold_sw_stats64(priv, stats);
3701         }
3702
3703         stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
3704
3705         stats->rx_length_errors =
3706                 PPORT_802_3_GET(pstats, a_in_range_length_errors) +
3707                 PPORT_802_3_GET(pstats, a_out_of_range_length_field) +
3708                 PPORT_802_3_GET(pstats, a_frame_too_long_errors) +
3709                 VNIC_ENV_GET(&priv->stats.vnic, eth_wqe_too_small);
3710         stats->rx_crc_errors =
3711                 PPORT_802_3_GET(pstats, a_frame_check_sequence_errors);
3712         stats->rx_frame_errors = PPORT_802_3_GET(pstats, a_alignment_errors);
3713         stats->tx_aborted_errors = PPORT_2863_GET(pstats, if_out_discards);
3714         stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors +
3715                            stats->rx_frame_errors;
3716         stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
3717 }
3718
3719 static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
3720 {
3721         if (mlx5e_is_uplink_rep(priv))
3722                 return; /* no rx mode for uplink rep */
3723
3724         queue_work(priv->wq, &priv->set_rx_mode_work);
3725 }
3726
3727 static void mlx5e_set_rx_mode(struct net_device *dev)
3728 {
3729         struct mlx5e_priv *priv = netdev_priv(dev);
3730
3731         mlx5e_nic_set_rx_mode(priv);
3732 }
3733
3734 static int mlx5e_set_mac(struct net_device *netdev, void *addr)
3735 {
3736         struct mlx5e_priv *priv = netdev_priv(netdev);
3737         struct sockaddr *saddr = addr;
3738
3739         if (!is_valid_ether_addr(saddr->sa_data))
3740                 return -EADDRNOTAVAIL;
3741
3742         netif_addr_lock_bh(netdev);
3743         eth_hw_addr_set(netdev, saddr->sa_data);
3744         netif_addr_unlock_bh(netdev);
3745
3746         mlx5e_nic_set_rx_mode(priv);
3747
3748         return 0;
3749 }
3750
3751 #define MLX5E_SET_FEATURE(features, feature, enable)    \
3752         do {                                            \
3753                 if (enable)                             \
3754                         *features |= feature;           \
3755                 else                                    \
3756                         *features &= ~feature;          \
3757         } while (0)
3758
3759 typedef int (*mlx5e_feature_handler)(struct net_device *netdev, bool enable);
3760
3761 static int set_feature_lro(struct net_device *netdev, bool enable)
3762 {
3763         struct mlx5e_priv *priv = netdev_priv(netdev);
3764         struct mlx5_core_dev *mdev = priv->mdev;
3765         struct mlx5e_params *cur_params;
3766         struct mlx5e_params new_params;
3767         bool reset = true;
3768         int err = 0;
3769
3770         mutex_lock(&priv->state_lock);
3771
3772         cur_params = &priv->channels.params;
3773         new_params = *cur_params;
3774
3775         if (enable)
3776                 new_params.packet_merge.type = MLX5E_PACKET_MERGE_LRO;
3777         else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)
3778                 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
3779         else
3780                 goto out;
3781
3782         if (!(cur_params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO &&
3783               new_params.packet_merge.type == MLX5E_PACKET_MERGE_LRO)) {
3784                 if (cur_params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) {
3785                         if (mlx5e_rx_mpwqe_is_linear_skb(mdev, cur_params, NULL) ==
3786                             mlx5e_rx_mpwqe_is_linear_skb(mdev, &new_params, NULL))
3787                                 reset = false;
3788                 }
3789         }
3790
3791         err = mlx5e_safe_switch_params(priv, &new_params,
3792                                        mlx5e_modify_tirs_packet_merge_ctx, NULL, reset);
3793 out:
3794         mutex_unlock(&priv->state_lock);
3795         return err;
3796 }
3797
3798 static int set_feature_hw_gro(struct net_device *netdev, bool enable)
3799 {
3800         struct mlx5e_priv *priv = netdev_priv(netdev);
3801         struct mlx5e_params new_params;
3802         bool reset = true;
3803         int err = 0;
3804
3805         mutex_lock(&priv->state_lock);
3806         new_params = priv->channels.params;
3807
3808         if (enable) {
3809                 new_params.packet_merge.type = MLX5E_PACKET_MERGE_SHAMPO;
3810                 new_params.packet_merge.shampo.match_criteria_type =
3811                         MLX5_RQC_SHAMPO_MATCH_CRITERIA_TYPE_EXTENDED;
3812                 new_params.packet_merge.shampo.alignment_granularity =
3813                         MLX5_RQC_SHAMPO_NO_MATCH_ALIGNMENT_GRANULARITY_STRIDE;
3814         } else if (new_params.packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) {
3815                 new_params.packet_merge.type = MLX5E_PACKET_MERGE_NONE;
3816         } else {
3817                 goto out;
3818         }
3819
3820         err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
3821 out:
3822         mutex_unlock(&priv->state_lock);
3823         return err;
3824 }
3825
3826 static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
3827 {
3828         struct mlx5e_priv *priv = netdev_priv(netdev);
3829
3830         if (enable)
3831                 mlx5e_enable_cvlan_filter(priv->fs,
3832                                           !!(priv->netdev->flags & IFF_PROMISC));
3833         else
3834                 mlx5e_disable_cvlan_filter(priv->fs,
3835                                            !!(priv->netdev->flags & IFF_PROMISC));
3836
3837         return 0;
3838 }
3839
3840 static int set_feature_hw_tc(struct net_device *netdev, bool enable)
3841 {
3842         struct mlx5e_priv *priv = netdev_priv(netdev);
3843         int err = 0;
3844
3845 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
3846         int tc_flag = mlx5e_is_uplink_rep(priv) ? MLX5_TC_FLAG(ESW_OFFLOAD) :
3847                                                   MLX5_TC_FLAG(NIC_OFFLOAD);
3848         if (!enable && mlx5e_tc_num_filters(priv, tc_flag)) {
3849                 netdev_err(netdev,
3850                            "Active offloaded tc filters, can't turn hw_tc_offload off\n");
3851                 return -EINVAL;
3852         }
3853 #endif
3854
3855         mutex_lock(&priv->state_lock);
3856         if (!enable && mlx5e_selq_is_htb_enabled(&priv->selq)) {
3857                 netdev_err(netdev, "Active HTB offload, can't turn hw_tc_offload off\n");
3858                 err = -EINVAL;
3859         }
3860         mutex_unlock(&priv->state_lock);
3861
3862         return err;
3863 }
3864
3865 static int set_feature_rx_all(struct net_device *netdev, bool enable)
3866 {
3867         struct mlx5e_priv *priv = netdev_priv(netdev);
3868         struct mlx5_core_dev *mdev = priv->mdev;
3869
3870         return mlx5_set_port_fcs(mdev, !enable);
3871 }
3872
3873 static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable)
3874 {
3875         u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {};
3876         bool supported, curr_state;
3877         int err;
3878
3879         if (!MLX5_CAP_GEN(mdev, ports_check))
3880                 return 0;
3881
3882         err = mlx5_query_ports_check(mdev, in, sizeof(in));
3883         if (err)
3884                 return err;
3885
3886         supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap);
3887         curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc);
3888
3889         if (!supported || enable == curr_state)
3890                 return 0;
3891
3892         MLX5_SET(pcmr_reg, in, local_port, 1);
3893         MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable);
3894
3895         return mlx5_set_ports_check(mdev, in, sizeof(in));
3896 }
3897
3898 static int mlx5e_set_rx_port_ts_wrap(struct mlx5e_priv *priv, void *ctx)
3899 {
3900         struct mlx5_core_dev *mdev = priv->mdev;
3901         bool enable = *(bool *)ctx;
3902
3903         return mlx5e_set_rx_port_ts(mdev, enable);
3904 }
3905
3906 static int set_feature_rx_fcs(struct net_device *netdev, bool enable)
3907 {
3908         struct mlx5e_priv *priv = netdev_priv(netdev);
3909         struct mlx5e_channels *chs = &priv->channels;
3910         struct mlx5e_params new_params;
3911         int err;
3912
3913         mutex_lock(&priv->state_lock);
3914
3915         new_params = chs->params;
3916         new_params.scatter_fcs_en = enable;
3917         err = mlx5e_safe_switch_params(priv, &new_params, mlx5e_set_rx_port_ts_wrap,
3918                                        &new_params.scatter_fcs_en, true);
3919         mutex_unlock(&priv->state_lock);
3920         return err;
3921 }
3922
3923 static int set_feature_rx_vlan(struct net_device *netdev, bool enable)
3924 {
3925         struct mlx5e_priv *priv = netdev_priv(netdev);
3926         int err = 0;
3927
3928         mutex_lock(&priv->state_lock);
3929
3930         mlx5e_fs_set_vlan_strip_disable(priv->fs, !enable);
3931         priv->channels.params.vlan_strip_disable = !enable;
3932
3933         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
3934                 goto unlock;
3935
3936         err = mlx5e_modify_channels_vsd(&priv->channels, !enable);
3937         if (err) {
3938                 mlx5e_fs_set_vlan_strip_disable(priv->fs, enable);
3939                 priv->channels.params.vlan_strip_disable = enable;
3940         }
3941 unlock:
3942         mutex_unlock(&priv->state_lock);
3943
3944         return err;
3945 }
3946
3947 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
3948 {
3949         struct mlx5e_priv *priv = netdev_priv(dev);
3950         struct mlx5e_flow_steering *fs = priv->fs;
3951
3952         if (mlx5e_is_uplink_rep(priv))
3953                 return 0; /* no vlan table for uplink rep */
3954
3955         return mlx5e_fs_vlan_rx_add_vid(fs, dev, proto, vid);
3956 }
3957
3958 int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
3959 {
3960         struct mlx5e_priv *priv = netdev_priv(dev);
3961         struct mlx5e_flow_steering *fs = priv->fs;
3962
3963         if (mlx5e_is_uplink_rep(priv))
3964                 return 0; /* no vlan table for uplink rep */
3965
3966         return mlx5e_fs_vlan_rx_kill_vid(fs, dev, proto, vid);
3967 }
3968
3969 #ifdef CONFIG_MLX5_EN_ARFS
3970 static int set_feature_arfs(struct net_device *netdev, bool enable)
3971 {
3972         struct mlx5e_priv *priv = netdev_priv(netdev);
3973         int err;
3974
3975         if (enable)
3976                 err = mlx5e_arfs_enable(priv->fs);
3977         else
3978                 err = mlx5e_arfs_disable(priv->fs);
3979
3980         return err;
3981 }
3982 #endif
3983
3984 static int mlx5e_handle_feature(struct net_device *netdev,
3985                                 netdev_features_t *features,
3986                                 netdev_features_t feature,
3987                                 mlx5e_feature_handler feature_handler)
3988 {
3989         netdev_features_t changes = *features ^ netdev->features;
3990         bool enable = !!(*features & feature);
3991         int err;
3992
3993         if (!(changes & feature))
3994                 return 0;
3995
3996         err = feature_handler(netdev, enable);
3997         if (err) {
3998                 MLX5E_SET_FEATURE(features, feature, !enable);
3999                 netdev_err(netdev, "%s feature %pNF failed, err %d\n",
4000                            enable ? "Enable" : "Disable", &feature, err);
4001                 return err;
4002         }
4003
4004         return 0;
4005 }
4006
4007 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
4008 {
4009         netdev_features_t oper_features = features;
4010         int err = 0;
4011
4012 #define MLX5E_HANDLE_FEATURE(feature, handler) \
4013         mlx5e_handle_feature(netdev, &oper_features, feature, handler)
4014
4015         err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
4016         err |= MLX5E_HANDLE_FEATURE(NETIF_F_GRO_HW, set_feature_hw_gro);
4017         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
4018                                     set_feature_cvlan_filter);
4019         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_hw_tc);
4020         err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
4021         err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXFCS, set_feature_rx_fcs);
4022         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_RX, set_feature_rx_vlan);
4023 #ifdef CONFIG_MLX5_EN_ARFS
4024         err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
4025 #endif
4026         err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx);
4027
4028         if (err) {
4029                 netdev->features = oper_features;
4030                 return -EINVAL;
4031         }
4032
4033         return 0;
4034 }
4035
4036 static netdev_features_t mlx5e_fix_uplink_rep_features(struct net_device *netdev,
4037                                                        netdev_features_t features)
4038 {
4039         features &= ~NETIF_F_HW_TLS_RX;
4040         if (netdev->features & NETIF_F_HW_TLS_RX)
4041                 netdev_warn(netdev, "Disabling hw_tls_rx, not supported in switchdev mode\n");
4042
4043         features &= ~NETIF_F_HW_TLS_TX;
4044         if (netdev->features & NETIF_F_HW_TLS_TX)
4045                 netdev_warn(netdev, "Disabling hw_tls_tx, not supported in switchdev mode\n");
4046
4047         features &= ~NETIF_F_NTUPLE;
4048         if (netdev->features & NETIF_F_NTUPLE)
4049                 netdev_warn(netdev, "Disabling ntuple, not supported in switchdev mode\n");
4050
4051         features &= ~NETIF_F_GRO_HW;
4052         if (netdev->features & NETIF_F_GRO_HW)
4053                 netdev_warn(netdev, "Disabling HW_GRO, not supported in switchdev mode\n");
4054
4055         features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
4056         if (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
4057                 netdev_warn(netdev, "Disabling HW_VLAN CTAG FILTERING, not supported in switchdev mode\n");
4058
4059         return features;
4060 }
4061
4062 static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
4063                                             netdev_features_t features)
4064 {
4065         struct mlx5e_priv *priv = netdev_priv(netdev);
4066         struct mlx5e_vlan_table *vlan;
4067         struct mlx5e_params *params;
4068
4069         if (!netif_device_present(netdev))
4070                 return features;
4071
4072         vlan = mlx5e_fs_get_vlan(priv->fs);
4073         mutex_lock(&priv->state_lock);
4074         params = &priv->channels.params;
4075         if (!vlan ||
4076             !bitmap_empty(mlx5e_vlan_get_active_svlans(vlan), VLAN_N_VID)) {
4077                 /* HW strips the outer C-tag header, this is a problem
4078                  * for S-tag traffic.
4079                  */
4080                 features &= ~NETIF_F_HW_VLAN_CTAG_RX;
4081                 if (!params->vlan_strip_disable)
4082                         netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n");
4083         }
4084
4085         if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
4086                 if (features & NETIF_F_LRO) {
4087                         netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n");
4088                         features &= ~NETIF_F_LRO;
4089                 }
4090                 if (features & NETIF_F_GRO_HW) {
4091                         netdev_warn(netdev, "Disabling HW-GRO, not supported in legacy RQ\n");
4092                         features &= ~NETIF_F_GRO_HW;
4093                 }
4094         }
4095
4096         if (params->xdp_prog) {
4097                 if (features & NETIF_F_LRO) {
4098                         netdev_warn(netdev, "LRO is incompatible with XDP\n");
4099                         features &= ~NETIF_F_LRO;
4100                 }
4101                 if (features & NETIF_F_GRO_HW) {
4102                         netdev_warn(netdev, "HW GRO is incompatible with XDP\n");
4103                         features &= ~NETIF_F_GRO_HW;
4104                 }
4105         }
4106
4107         if (priv->xsk.refcnt) {
4108                 if (features & NETIF_F_LRO) {
4109                         netdev_warn(netdev, "LRO is incompatible with AF_XDP (%u XSKs are active)\n",
4110                                     priv->xsk.refcnt);
4111                         features &= ~NETIF_F_LRO;
4112                 }
4113                 if (features & NETIF_F_GRO_HW) {
4114                         netdev_warn(netdev, "HW GRO is incompatible with AF_XDP (%u XSKs are active)\n",
4115                                     priv->xsk.refcnt);
4116                         features &= ~NETIF_F_GRO_HW;
4117                 }
4118         }
4119
4120         if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
4121                 features &= ~NETIF_F_RXHASH;
4122                 if (netdev->features & NETIF_F_RXHASH)
4123                         netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
4124
4125                 if (features & NETIF_F_GRO_HW) {
4126                         netdev_warn(netdev, "Disabling HW-GRO, not supported when CQE compress is active\n");
4127                         features &= ~NETIF_F_GRO_HW;
4128                 }
4129         }
4130
4131         if (mlx5e_is_uplink_rep(priv))
4132                 features = mlx5e_fix_uplink_rep_features(netdev, features);
4133
4134         mutex_unlock(&priv->state_lock);
4135
4136         return features;
4137 }
4138
4139 static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
4140                                    struct mlx5e_channels *chs,
4141                                    struct mlx5e_params *new_params,
4142                                    struct mlx5_core_dev *mdev)
4143 {
4144         u16 ix;
4145
4146         for (ix = 0; ix < chs->params.num_channels; ix++) {
4147                 struct xsk_buff_pool *xsk_pool =
4148                         mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
4149                 struct mlx5e_xsk_param xsk;
4150
4151                 if (!xsk_pool)
4152                         continue;
4153
4154                 mlx5e_build_xsk_param(xsk_pool, &xsk);
4155
4156                 if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
4157                         u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
4158                         int max_mtu_frame, max_mtu_page, max_mtu;
4159
4160                         /* Two criteria must be met:
4161                          * 1. HW MTU + all headrooms <= XSK frame size.
4162                          * 2. Size of SKBs allocated on XDP_PASS <= PAGE_SIZE.
4163                          */
4164                         max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
4165                         max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
4166                         max_mtu = min(max_mtu_frame, max_mtu_page);
4167
4168                         netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
4169                                    new_params->sw_mtu, ix, max_mtu);
4170                         return false;
4171                 }
4172         }
4173
4174         return true;
4175 }
4176
4177 static bool mlx5e_params_validate_xdp(struct net_device *netdev,
4178                                       struct mlx5_core_dev *mdev,
4179                                       struct mlx5e_params *params)
4180 {
4181         bool is_linear;
4182
4183         /* No XSK params: AF_XDP can't be enabled yet at the point of setting
4184          * the XDP program.
4185          */
4186         is_linear = mlx5e_rx_is_linear_skb(mdev, params, NULL);
4187
4188         if (!is_linear && params->rq_wq_type != MLX5_WQ_TYPE_CYCLIC) {
4189                 netdev_warn(netdev, "XDP is not allowed with striding RQ and MTU(%d) > %d\n",
4190                             params->sw_mtu,
4191                             mlx5e_xdp_max_mtu(params, NULL));
4192                 return false;
4193         }
4194         if (!is_linear && !params->xdp_prog->aux->xdp_has_frags) {
4195                 netdev_warn(netdev, "MTU(%d) > %d, too big for an XDP program not aware of multi buffer\n",
4196                             params->sw_mtu,
4197                             mlx5e_xdp_max_mtu(params, NULL));
4198                 return false;
4199         }
4200
4201         return true;
4202 }
4203
4204 int mlx5e_change_mtu(struct net_device *netdev, int new_mtu,
4205                      mlx5e_fp_preactivate preactivate)
4206 {
4207         struct mlx5e_priv *priv = netdev_priv(netdev);
4208         struct mlx5e_params new_params;
4209         struct mlx5e_params *params;
4210         bool reset = true;
4211         int err = 0;
4212
4213         mutex_lock(&priv->state_lock);
4214
4215         params = &priv->channels.params;
4216
4217         new_params = *params;
4218         new_params.sw_mtu = new_mtu;
4219         err = mlx5e_validate_params(priv->mdev, &new_params);
4220         if (err)
4221                 goto out;
4222
4223         if (new_params.xdp_prog && !mlx5e_params_validate_xdp(netdev, priv->mdev,
4224                                                               &new_params)) {
4225                 err = -EINVAL;
4226                 goto out;
4227         }
4228
4229         if (priv->xsk.refcnt &&
4230             !mlx5e_xsk_validate_mtu(netdev, &priv->channels,
4231                                     &new_params, priv->mdev)) {
4232                 err = -EINVAL;
4233                 goto out;
4234         }
4235
4236         if (params->packet_merge.type == MLX5E_PACKET_MERGE_LRO)
4237                 reset = false;
4238
4239         if (params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
4240             params->packet_merge.type != MLX5E_PACKET_MERGE_SHAMPO) {
4241                 bool is_linear_old = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev, params, NULL);
4242                 bool is_linear_new = mlx5e_rx_mpwqe_is_linear_skb(priv->mdev,
4243                                                                   &new_params, NULL);
4244                 u8 sz_old = mlx5e_mpwqe_get_log_rq_size(priv->mdev, params, NULL);
4245                 u8 sz_new = mlx5e_mpwqe_get_log_rq_size(priv->mdev, &new_params, NULL);
4246
4247                 /* Always reset in linear mode - hw_mtu is used in data path.
4248                  * Check that the mode was non-linear and didn't change.
4249                  * If XSK is active, XSK RQs are linear.
4250                  * Reset if the RQ size changed, even if it's non-linear.
4251                  */
4252                 if (!is_linear_old && !is_linear_new && !priv->xsk.refcnt &&
4253                     sz_old == sz_new)
4254                         reset = false;
4255         }
4256
4257         err = mlx5e_safe_switch_params(priv, &new_params, preactivate, NULL, reset);
4258
4259 out:
4260         netdev->mtu = params->sw_mtu;
4261         mutex_unlock(&priv->state_lock);
4262         return err;
4263 }
4264
4265 static int mlx5e_change_nic_mtu(struct net_device *netdev, int new_mtu)
4266 {
4267         return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
4268 }
4269
4270 int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
4271 {
4272         bool set  = *(bool *)ctx;
4273
4274         return mlx5e_ptp_rx_manage_fs(priv, set);
4275 }
4276
4277 static int mlx5e_hwstamp_config_no_ptp_rx(struct mlx5e_priv *priv, bool rx_filter)
4278 {
4279         bool rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
4280         int err;
4281
4282         if (!rx_filter)
4283                 /* Reset CQE compression to Admin default */
4284                 return mlx5e_modify_rx_cqe_compression_locked(priv, rx_cqe_compress_def, false);
4285
4286         if (!MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
4287                 return 0;
4288
4289         /* Disable CQE compression */
4290         netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
4291         err = mlx5e_modify_rx_cqe_compression_locked(priv, false, true);
4292         if (err)
4293                 netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
4294
4295         return err;
4296 }
4297
4298 static int mlx5e_hwstamp_config_ptp_rx(struct mlx5e_priv *priv, bool ptp_rx)
4299 {
4300         struct mlx5e_params new_params;
4301
4302         if (ptp_rx == priv->channels.params.ptp_rx)
4303                 return 0;
4304
4305         new_params = priv->channels.params;
4306         new_params.ptp_rx = ptp_rx;
4307         return mlx5e_safe_switch_params(priv, &new_params, mlx5e_ptp_rx_manage_fs_ctx,
4308                                         &new_params.ptp_rx, true);
4309 }
4310
4311 int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
4312 {
4313         struct hwtstamp_config config;
4314         bool rx_cqe_compress_def;
4315         bool ptp_rx;
4316         int err;
4317
4318         if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
4319             (mlx5_clock_get_ptp_index(priv->mdev) == -1))
4320                 return -EOPNOTSUPP;
4321
4322         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
4323                 return -EFAULT;
4324
4325         /* TX HW timestamp */
4326         switch (config.tx_type) {
4327         case HWTSTAMP_TX_OFF:
4328         case HWTSTAMP_TX_ON:
4329                 break;
4330         default:
4331                 return -ERANGE;
4332         }
4333
4334         mutex_lock(&priv->state_lock);
4335         rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
4336
4337         /* RX HW timestamp */
4338         switch (config.rx_filter) {
4339         case HWTSTAMP_FILTER_NONE:
4340                 ptp_rx = false;
4341                 break;
4342         case HWTSTAMP_FILTER_ALL:
4343         case HWTSTAMP_FILTER_SOME:
4344         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
4345         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
4346         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
4347         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
4348         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
4349         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
4350         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
4351         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
4352         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
4353         case HWTSTAMP_FILTER_PTP_V2_EVENT:
4354         case HWTSTAMP_FILTER_PTP_V2_SYNC:
4355         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
4356         case HWTSTAMP_FILTER_NTP_ALL:
4357                 config.rx_filter = HWTSTAMP_FILTER_ALL;
4358                 /* ptp_rx is set if both HW TS is set and CQE
4359                  * compression is set
4360                  */
4361                 ptp_rx = rx_cqe_compress_def;
4362                 break;
4363         default:
4364                 err = -ERANGE;
4365                 goto err_unlock;
4366         }
4367
4368         if (!mlx5e_profile_feature_cap(priv->profile, PTP_RX))
4369                 err = mlx5e_hwstamp_config_no_ptp_rx(priv,
4370                                                      config.rx_filter != HWTSTAMP_FILTER_NONE);
4371         else
4372                 err = mlx5e_hwstamp_config_ptp_rx(priv, ptp_rx);
4373         if (err)
4374                 goto err_unlock;
4375
4376         memcpy(&priv->tstamp, &config, sizeof(config));
4377         mutex_unlock(&priv->state_lock);
4378
4379         /* might need to fix some features */
4380         netdev_update_features(priv->netdev);
4381
4382         return copy_to_user(ifr->ifr_data, &config,
4383                             sizeof(config)) ? -EFAULT : 0;
4384 err_unlock:
4385         mutex_unlock(&priv->state_lock);
4386         return err;
4387 }
4388
4389 int mlx5e_hwstamp_get(struct mlx5e_priv *priv, struct ifreq *ifr)
4390 {
4391         struct hwtstamp_config *cfg = &priv->tstamp;
4392
4393         if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
4394                 return -EOPNOTSUPP;
4395
4396         return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
4397 }
4398
4399 static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
4400 {
4401         struct mlx5e_priv *priv = netdev_priv(dev);
4402
4403         switch (cmd) {
4404         case SIOCSHWTSTAMP:
4405                 return mlx5e_hwstamp_set(priv, ifr);
4406         case SIOCGHWTSTAMP:
4407                 return mlx5e_hwstamp_get(priv, ifr);
4408         default:
4409                 return -EOPNOTSUPP;
4410         }
4411 }
4412
4413 #ifdef CONFIG_MLX5_ESWITCH
4414 int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
4415 {
4416         struct mlx5e_priv *priv = netdev_priv(dev);
4417         struct mlx5_core_dev *mdev = priv->mdev;
4418
4419         return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac);
4420 }
4421
4422 static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
4423                              __be16 vlan_proto)
4424 {
4425         struct mlx5e_priv *priv = netdev_priv(dev);
4426         struct mlx5_core_dev *mdev = priv->mdev;
4427
4428         if (vlan_proto != htons(ETH_P_8021Q))
4429                 return -EPROTONOSUPPORT;
4430
4431         return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1,
4432                                            vlan, qos);
4433 }
4434
4435 static int mlx5e_set_vf_spoofchk(struct net_device *dev, int vf, bool setting)
4436 {
4437         struct mlx5e_priv *priv = netdev_priv(dev);
4438         struct mlx5_core_dev *mdev = priv->mdev;
4439
4440         return mlx5_eswitch_set_vport_spoofchk(mdev->priv.eswitch, vf + 1, setting);
4441 }
4442
4443 static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
4444 {
4445         struct mlx5e_priv *priv = netdev_priv(dev);
4446         struct mlx5_core_dev *mdev = priv->mdev;
4447
4448         return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
4449 }
4450
4451 int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
4452                       int max_tx_rate)
4453 {
4454         struct mlx5e_priv *priv = netdev_priv(dev);
4455         struct mlx5_core_dev *mdev = priv->mdev;
4456
4457         return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
4458                                            max_tx_rate, min_tx_rate);
4459 }
4460
4461 static int mlx5_vport_link2ifla(u8 esw_link)
4462 {
4463         switch (esw_link) {
4464         case MLX5_VPORT_ADMIN_STATE_DOWN:
4465                 return IFLA_VF_LINK_STATE_DISABLE;
4466         case MLX5_VPORT_ADMIN_STATE_UP:
4467                 return IFLA_VF_LINK_STATE_ENABLE;
4468         }
4469         return IFLA_VF_LINK_STATE_AUTO;
4470 }
4471
4472 static int mlx5_ifla_link2vport(u8 ifla_link)
4473 {
4474         switch (ifla_link) {
4475         case IFLA_VF_LINK_STATE_DISABLE:
4476                 return MLX5_VPORT_ADMIN_STATE_DOWN;
4477         case IFLA_VF_LINK_STATE_ENABLE:
4478                 return MLX5_VPORT_ADMIN_STATE_UP;
4479         }
4480         return MLX5_VPORT_ADMIN_STATE_AUTO;
4481 }
4482
4483 static int mlx5e_set_vf_link_state(struct net_device *dev, int vf,
4484                                    int link_state)
4485 {
4486         struct mlx5e_priv *priv = netdev_priv(dev);
4487         struct mlx5_core_dev *mdev = priv->mdev;
4488
4489         if (mlx5e_is_uplink_rep(priv))
4490                 return -EOPNOTSUPP;
4491
4492         return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
4493                                             mlx5_ifla_link2vport(link_state));
4494 }
4495
4496 int mlx5e_get_vf_config(struct net_device *dev,
4497                         int vf, struct ifla_vf_info *ivi)
4498 {
4499         struct mlx5e_priv *priv = netdev_priv(dev);
4500         struct mlx5_core_dev *mdev = priv->mdev;
4501         int err;
4502
4503         if (!netif_device_present(dev))
4504                 return -EOPNOTSUPP;
4505
4506         err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
4507         if (err)
4508                 return err;
4509         ivi->linkstate = mlx5_vport_link2ifla(ivi->linkstate);
4510         return 0;
4511 }
4512
4513 int mlx5e_get_vf_stats(struct net_device *dev,
4514                        int vf, struct ifla_vf_stats *vf_stats)
4515 {
4516         struct mlx5e_priv *priv = netdev_priv(dev);
4517         struct mlx5_core_dev *mdev = priv->mdev;
4518
4519         return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
4520                                             vf_stats);
4521 }
4522
4523 static bool
4524 mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
4525 {
4526         struct mlx5e_priv *priv = netdev_priv(dev);
4527
4528         if (!netif_device_present(dev))
4529                 return false;
4530
4531         if (!mlx5e_is_uplink_rep(priv))
4532                 return false;
4533
4534         return mlx5e_rep_has_offload_stats(dev, attr_id);
4535 }
4536
4537 static int
4538 mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
4539                         void *sp)
4540 {
4541         struct mlx5e_priv *priv = netdev_priv(dev);
4542
4543         if (!mlx5e_is_uplink_rep(priv))
4544                 return -EOPNOTSUPP;
4545
4546         return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
4547 }
4548 #endif
4549
4550 static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
4551 {
4552         switch (proto_type) {
4553         case IPPROTO_GRE:
4554                 return MLX5_CAP_ETH(mdev, tunnel_stateless_gre);
4555         case IPPROTO_IPIP:
4556         case IPPROTO_IPV6:
4557                 return (MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip) ||
4558                         MLX5_CAP_ETH(mdev, tunnel_stateless_ip_over_ip_tx));
4559         default:
4560                 return false;
4561         }
4562 }
4563
4564 static bool mlx5e_gre_tunnel_inner_proto_offload_supported(struct mlx5_core_dev *mdev,
4565                                                            struct sk_buff *skb)
4566 {
4567         switch (skb->inner_protocol) {
4568         case htons(ETH_P_IP):
4569         case htons(ETH_P_IPV6):
4570         case htons(ETH_P_TEB):
4571                 return true;
4572         case htons(ETH_P_MPLS_UC):
4573         case htons(ETH_P_MPLS_MC):
4574                 return MLX5_CAP_ETH(mdev, tunnel_stateless_mpls_over_gre);
4575         }
4576         return false;
4577 }
4578
4579 static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
4580                                                      struct sk_buff *skb,
4581                                                      netdev_features_t features)
4582 {
4583         unsigned int offset = 0;
4584         struct udphdr *udph;
4585         u8 proto;
4586         u16 port;
4587
4588         switch (vlan_get_protocol(skb)) {
4589         case htons(ETH_P_IP):
4590                 proto = ip_hdr(skb)->protocol;
4591                 break;
4592         case htons(ETH_P_IPV6):
4593                 proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
4594                 break;
4595         default:
4596                 goto out;
4597         }
4598
4599         switch (proto) {
4600         case IPPROTO_GRE:
4601                 if (mlx5e_gre_tunnel_inner_proto_offload_supported(priv->mdev, skb))
4602                         return features;
4603                 break;
4604         case IPPROTO_IPIP:
4605         case IPPROTO_IPV6:
4606                 if (mlx5e_tunnel_proto_supported_tx(priv->mdev, IPPROTO_IPIP))
4607                         return features;
4608                 break;
4609         case IPPROTO_UDP:
4610                 udph = udp_hdr(skb);
4611                 port = be16_to_cpu(udph->dest);
4612
4613                 /* Verify if UDP port is being offloaded by HW */
4614                 if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port))
4615                         return features;
4616
4617 #if IS_ENABLED(CONFIG_GENEVE)
4618                 /* Support Geneve offload for default UDP port */
4619                 if (port == GENEVE_UDP_PORT && mlx5_geneve_tx_allowed(priv->mdev))
4620                         return features;
4621 #endif
4622                 break;
4623 #ifdef CONFIG_MLX5_EN_IPSEC
4624         case IPPROTO_ESP:
4625                 return mlx5e_ipsec_feature_check(skb, features);
4626 #endif
4627         }
4628
4629 out:
4630         /* Disable CSUM and GSO if the udp dport is not offloaded by HW */
4631         return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
4632 }
4633
4634 netdev_features_t mlx5e_features_check(struct sk_buff *skb,
4635                                        struct net_device *netdev,
4636                                        netdev_features_t features)
4637 {
4638         struct mlx5e_priv *priv = netdev_priv(netdev);
4639
4640         features = vlan_features_check(skb, features);
4641         features = vxlan_features_check(skb, features);
4642
4643         /* Validate if the tunneled packet is being offloaded by HW */
4644         if (skb->encapsulation &&
4645             (features & NETIF_F_CSUM_MASK || features & NETIF_F_GSO_MASK))
4646                 return mlx5e_tunnel_features_check(priv, skb, features);
4647
4648         return features;
4649 }
4650
4651 static void mlx5e_tx_timeout_work(struct work_struct *work)
4652 {
4653         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
4654                                                tx_timeout_work);
4655         struct net_device *netdev = priv->netdev;
4656         int i;
4657
4658         rtnl_lock();
4659         mutex_lock(&priv->state_lock);
4660
4661         if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
4662                 goto unlock;
4663
4664         for (i = 0; i < netdev->real_num_tx_queues; i++) {
4665                 struct netdev_queue *dev_queue =
4666                         netdev_get_tx_queue(netdev, i);
4667                 struct mlx5e_txqsq *sq = priv->txq2sq[i];
4668
4669                 if (!netif_xmit_stopped(dev_queue))
4670                         continue;
4671
4672                 if (mlx5e_reporter_tx_timeout(sq))
4673                 /* break if tried to reopened channels */
4674                         break;
4675         }
4676
4677 unlock:
4678         mutex_unlock(&priv->state_lock);
4679         rtnl_unlock();
4680 }
4681
4682 static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
4683 {
4684         struct mlx5e_priv *priv = netdev_priv(dev);
4685
4686         netdev_err(dev, "TX timeout detected\n");
4687         queue_work(priv->wq, &priv->tx_timeout_work);
4688 }
4689
4690 static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
4691 {
4692         struct net_device *netdev = priv->netdev;
4693         struct mlx5e_params new_params;
4694
4695         if (priv->channels.params.packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
4696                 netdev_warn(netdev, "can't set XDP while HW-GRO/LRO is on, disable them first\n");
4697                 return -EINVAL;
4698         }
4699
4700         new_params = priv->channels.params;
4701         new_params.xdp_prog = prog;
4702
4703         if (!mlx5e_params_validate_xdp(netdev, priv->mdev, &new_params))
4704                 return -EINVAL;
4705
4706         return 0;
4707 }
4708
4709 static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog)
4710 {
4711         struct bpf_prog *old_prog;
4712
4713         old_prog = rcu_replace_pointer(rq->xdp_prog, prog,
4714                                        lockdep_is_held(&rq->priv->state_lock));
4715         if (old_prog)
4716                 bpf_prog_put(old_prog);
4717 }
4718
4719 static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
4720 {
4721         struct mlx5e_priv *priv = netdev_priv(netdev);
4722         struct mlx5e_params new_params;
4723         struct bpf_prog *old_prog;
4724         int err = 0;
4725         bool reset;
4726         int i;
4727
4728         mutex_lock(&priv->state_lock);
4729
4730         if (prog) {
4731                 err = mlx5e_xdp_allowed(priv, prog);
4732                 if (err)
4733                         goto unlock;
4734         }
4735
4736         /* no need for full reset when exchanging programs */
4737         reset = (!priv->channels.params.xdp_prog || !prog);
4738
4739         new_params = priv->channels.params;
4740         new_params.xdp_prog = prog;
4741
4742         /* XDP affects striding RQ parameters. Block XDP if striding RQ won't be
4743          * supported with the new parameters: if PAGE_SIZE is bigger than
4744          * MLX5_MPWQE_LOG_STRIDE_SZ_MAX, striding RQ can't be used, even though
4745          * the MTU is small enough for the linear mode, because XDP uses strides
4746          * of PAGE_SIZE on regular RQs.
4747          */
4748         if (reset && MLX5E_GET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ)) {
4749                 /* Checking for regular RQs here; XSK RQs were checked on XSK bind. */
4750                 err = mlx5e_mpwrq_validate_regular(priv->mdev, &new_params);
4751                 if (err)
4752                         goto unlock;
4753         }
4754
4755         old_prog = priv->channels.params.xdp_prog;
4756
4757         err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, reset);
4758         if (err)
4759                 goto unlock;
4760
4761         if (old_prog)
4762                 bpf_prog_put(old_prog);
4763
4764         if (reset) {
4765                 if (prog)
4766                         xdp_features_set_redirect_target(netdev, true);
4767                 else
4768                         xdp_features_clear_redirect_target(netdev);
4769         }
4770
4771         if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
4772                 goto unlock;
4773
4774         /* exchanging programs w/o reset, we update ref counts on behalf
4775          * of the channels RQs here.
4776          */
4777         bpf_prog_add(prog, priv->channels.num);
4778         for (i = 0; i < priv->channels.num; i++) {
4779                 struct mlx5e_channel *c = priv->channels.c[i];
4780
4781                 mlx5e_rq_replace_xdp_prog(&c->rq, prog);
4782                 if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
4783                         bpf_prog_inc(prog);
4784                         mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
4785                 }
4786         }
4787
4788 unlock:
4789         mutex_unlock(&priv->state_lock);
4790
4791         /* Need to fix some features. */
4792         if (!err)
4793                 netdev_update_features(netdev);
4794
4795         return err;
4796 }
4797
4798 static int mlx5e_xdp(struct net_device *dev, struct netdev_bpf *xdp)
4799 {
4800         switch (xdp->command) {
4801         case XDP_SETUP_PROG:
4802                 return mlx5e_xdp_set(dev, xdp->prog);
4803         case XDP_SETUP_XSK_POOL:
4804                 return mlx5e_xsk_setup_pool(dev, xdp->xsk.pool,
4805                                             xdp->xsk.queue_id);
4806         default:
4807                 return -EINVAL;
4808         }
4809 }
4810
4811 #ifdef CONFIG_MLX5_ESWITCH
4812 static int mlx5e_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4813                                 struct net_device *dev, u32 filter_mask,
4814                                 int nlflags)
4815 {
4816         struct mlx5e_priv *priv = netdev_priv(dev);
4817         struct mlx5_core_dev *mdev = priv->mdev;
4818         u8 mode, setting;
4819         int err;
4820
4821         err = mlx5_eswitch_get_vepa(mdev->priv.eswitch, &setting);
4822         if (err)
4823                 return err;
4824         mode = setting ? BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB;
4825         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4826                                        mode,
4827                                        0, 0, nlflags, filter_mask, NULL);
4828 }
4829
4830 static int mlx5e_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4831                                 u16 flags, struct netlink_ext_ack *extack)
4832 {
4833         struct mlx5e_priv *priv = netdev_priv(dev);
4834         struct mlx5_core_dev *mdev = priv->mdev;
4835         struct nlattr *attr, *br_spec;
4836         u16 mode = BRIDGE_MODE_UNDEF;
4837         u8 setting;
4838         int rem;
4839
4840         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4841         if (!br_spec)
4842                 return -EINVAL;
4843
4844         nla_for_each_nested(attr, br_spec, rem) {
4845                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4846                         continue;
4847
4848                 if (nla_len(attr) < sizeof(mode))
4849                         return -EINVAL;
4850
4851                 mode = nla_get_u16(attr);
4852                 if (mode > BRIDGE_MODE_VEPA)
4853                         return -EINVAL;
4854
4855                 break;
4856         }
4857
4858         if (mode == BRIDGE_MODE_UNDEF)
4859                 return -EINVAL;
4860
4861         setting = (mode == BRIDGE_MODE_VEPA) ?  1 : 0;
4862         return mlx5_eswitch_set_vepa(mdev->priv.eswitch, setting);
4863 }
4864 #endif
4865
4866 const struct net_device_ops mlx5e_netdev_ops = {
4867         .ndo_open                = mlx5e_open,
4868         .ndo_stop                = mlx5e_close,
4869         .ndo_start_xmit          = mlx5e_xmit,
4870         .ndo_setup_tc            = mlx5e_setup_tc,
4871         .ndo_select_queue        = mlx5e_select_queue,
4872         .ndo_get_stats64         = mlx5e_get_stats,
4873         .ndo_set_rx_mode         = mlx5e_set_rx_mode,
4874         .ndo_set_mac_address     = mlx5e_set_mac,
4875         .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
4876         .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
4877         .ndo_set_features        = mlx5e_set_features,
4878         .ndo_fix_features        = mlx5e_fix_features,
4879         .ndo_change_mtu          = mlx5e_change_nic_mtu,
4880         .ndo_eth_ioctl            = mlx5e_ioctl,
4881         .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
4882         .ndo_features_check      = mlx5e_features_check,
4883         .ndo_tx_timeout          = mlx5e_tx_timeout,
4884         .ndo_bpf                 = mlx5e_xdp,
4885         .ndo_xdp_xmit            = mlx5e_xdp_xmit,
4886         .ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
4887 #ifdef CONFIG_MLX5_EN_ARFS
4888         .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
4889 #endif
4890 #ifdef CONFIG_MLX5_ESWITCH
4891         .ndo_bridge_setlink      = mlx5e_bridge_setlink,
4892         .ndo_bridge_getlink      = mlx5e_bridge_getlink,
4893
4894         /* SRIOV E-Switch NDOs */
4895         .ndo_set_vf_mac          = mlx5e_set_vf_mac,
4896         .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
4897         .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
4898         .ndo_set_vf_trust        = mlx5e_set_vf_trust,
4899         .ndo_set_vf_rate         = mlx5e_set_vf_rate,
4900         .ndo_get_vf_config       = mlx5e_get_vf_config,
4901         .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
4902         .ndo_get_vf_stats        = mlx5e_get_vf_stats,
4903         .ndo_has_offload_stats   = mlx5e_has_offload_stats,
4904         .ndo_get_offload_stats   = mlx5e_get_offload_stats,
4905 #endif
4906 };
4907
4908 static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
4909 {
4910         int i;
4911
4912         /* The supported periods are organized in ascending order */
4913         for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++)
4914                 if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout)
4915                         break;
4916
4917         return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
4918 }
4919
4920 void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu)
4921 {
4922         struct mlx5e_params *params = &priv->channels.params;
4923         struct mlx5_core_dev *mdev = priv->mdev;
4924         u8 rx_cq_period_mode;
4925
4926         params->sw_mtu = mtu;
4927         params->hard_mtu = MLX5E_ETH_HARD_MTU;
4928         params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
4929                                      priv->max_nch);
4930         mlx5e_params_mqprio_reset(params);
4931
4932         /* SQ */
4933         params->log_sq_size = is_kdump_kernel() ?
4934                 MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
4935                 MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
4936         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
4937
4938         /* XDP SQ */
4939         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
4940
4941         /* set CQE compression */
4942         params->rx_cqe_compress_def = false;
4943         if (MLX5_CAP_GEN(mdev, cqe_compression) &&
4944             MLX5_CAP_GEN(mdev, vport_group_manager))
4945                 params->rx_cqe_compress_def = slow_pci_heuristic(mdev);
4946
4947         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
4948         MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, false);
4949
4950         /* RQ */
4951         mlx5e_build_rq_params(mdev, params);
4952
4953         params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT);
4954
4955         /* CQ moderation params */
4956         rx_cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ?
4957                         MLX5_CQ_PERIOD_MODE_START_FROM_CQE :
4958                         MLX5_CQ_PERIOD_MODE_START_FROM_EQE;
4959         params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
4960         params->tx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
4961         mlx5e_set_rx_cq_mode_params(params, rx_cq_period_mode);
4962         mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
4963
4964         /* TX inline */
4965         mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
4966
4967         params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
4968
4969         /* AF_XDP */
4970         params->xsk = xsk;
4971
4972         /* Do not update netdev->features directly in here
4973          * on mlx5e_attach_netdev() we will call mlx5e_update_features()
4974          * To update netdev->features please modify mlx5e_fix_features()
4975          */
4976 }
4977
4978 static void mlx5e_set_netdev_dev_addr(struct net_device *netdev)
4979 {
4980         struct mlx5e_priv *priv = netdev_priv(netdev);
4981         u8 addr[ETH_ALEN];
4982
4983         mlx5_query_mac_address(priv->mdev, addr);
4984         if (is_zero_ether_addr(addr) &&
4985             !MLX5_CAP_GEN(priv->mdev, vport_group_manager)) {
4986                 eth_hw_addr_random(netdev);
4987                 mlx5_core_info(priv->mdev, "Assigned random MAC address %pM\n", netdev->dev_addr);
4988                 return;
4989         }
4990
4991         eth_hw_addr_set(netdev, addr);
4992 }
4993
4994 static int mlx5e_vxlan_set_port(struct net_device *netdev, unsigned int table,
4995                                 unsigned int entry, struct udp_tunnel_info *ti)
4996 {
4997         struct mlx5e_priv *priv = netdev_priv(netdev);
4998
4999         return mlx5_vxlan_add_port(priv->mdev->vxlan, ntohs(ti->port));
5000 }
5001
5002 static int mlx5e_vxlan_unset_port(struct net_device *netdev, unsigned int table,
5003                                   unsigned int entry, struct udp_tunnel_info *ti)
5004 {
5005         struct mlx5e_priv *priv = netdev_priv(netdev);
5006
5007         return mlx5_vxlan_del_port(priv->mdev->vxlan, ntohs(ti->port));
5008 }
5009
5010 void mlx5e_vxlan_set_netdev_info(struct mlx5e_priv *priv)
5011 {
5012         if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
5013                 return;
5014
5015         priv->nic_info.set_port = mlx5e_vxlan_set_port;
5016         priv->nic_info.unset_port = mlx5e_vxlan_unset_port;
5017         priv->nic_info.flags = UDP_TUNNEL_NIC_INFO_MAY_SLEEP |
5018                                 UDP_TUNNEL_NIC_INFO_STATIC_IANA_VXLAN;
5019         priv->nic_info.tables[0].tunnel_types = UDP_TUNNEL_TYPE_VXLAN;
5020         /* Don't count the space hard-coded to the IANA port */
5021         priv->nic_info.tables[0].n_entries =
5022                 mlx5_vxlan_max_udp_ports(priv->mdev) - 1;
5023
5024         priv->netdev->udp_tunnel_nic_info = &priv->nic_info;
5025 }
5026
5027 static bool mlx5e_tunnel_any_tx_proto_supported(struct mlx5_core_dev *mdev)
5028 {
5029         int tt;
5030
5031         for (tt = 0; tt < MLX5_NUM_TUNNEL_TT; tt++) {
5032                 if (mlx5e_tunnel_proto_supported_tx(mdev, mlx5_get_proto_by_tunnel_type(tt)))
5033                         return true;
5034         }
5035         return (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev));
5036 }
5037
5038 static void mlx5e_build_nic_netdev(struct net_device *netdev)
5039 {
5040         struct mlx5e_priv *priv = netdev_priv(netdev);
5041         struct mlx5_core_dev *mdev = priv->mdev;
5042         bool fcs_supported;
5043         bool fcs_enabled;
5044
5045         SET_NETDEV_DEV(netdev, mdev->device);
5046
5047         netdev->netdev_ops = &mlx5e_netdev_ops;
5048         netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops;
5049
5050         mlx5e_dcbnl_build_netdev(netdev);
5051
5052         netdev->watchdog_timeo    = 15 * HZ;
5053
5054         netdev->ethtool_ops       = &mlx5e_ethtool_ops;
5055
5056         netdev->vlan_features    |= NETIF_F_SG;
5057         netdev->vlan_features    |= NETIF_F_HW_CSUM;
5058         netdev->vlan_features    |= NETIF_F_GRO;
5059         netdev->vlan_features    |= NETIF_F_TSO;
5060         netdev->vlan_features    |= NETIF_F_TSO6;
5061         netdev->vlan_features    |= NETIF_F_RXCSUM;
5062         netdev->vlan_features    |= NETIF_F_RXHASH;
5063         netdev->vlan_features    |= NETIF_F_GSO_PARTIAL;
5064
5065         netdev->mpls_features    |= NETIF_F_SG;
5066         netdev->mpls_features    |= NETIF_F_HW_CSUM;
5067         netdev->mpls_features    |= NETIF_F_TSO;
5068         netdev->mpls_features    |= NETIF_F_TSO6;
5069
5070         netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_TX;
5071         netdev->hw_enc_features  |= NETIF_F_HW_VLAN_CTAG_RX;
5072
5073         /* Tunneled LRO is not supported in the driver, and the same RQs are
5074          * shared between inner and outer TIRs, so the driver can't disable LRO
5075          * for inner TIRs while having it enabled for outer TIRs. Due to this,
5076          * block LRO altogether if the firmware declares tunneled LRO support.
5077          */
5078         if (!!MLX5_CAP_ETH(mdev, lro_cap) &&
5079             !MLX5_CAP_ETH(mdev, tunnel_lro_vxlan) &&
5080             !MLX5_CAP_ETH(mdev, tunnel_lro_gre) &&
5081             mlx5e_check_fragmented_striding_rq_cap(mdev, PAGE_SHIFT,
5082                                                    MLX5E_MPWRQ_UMR_MODE_ALIGNED))
5083                 netdev->vlan_features    |= NETIF_F_LRO;
5084
5085         netdev->hw_features       = netdev->vlan_features;
5086         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_TX;
5087         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_RX;
5088         netdev->hw_features      |= NETIF_F_HW_VLAN_CTAG_FILTER;
5089         netdev->hw_features      |= NETIF_F_HW_VLAN_STAG_TX;
5090
5091         if (mlx5e_tunnel_any_tx_proto_supported(mdev)) {
5092                 netdev->hw_enc_features |= NETIF_F_HW_CSUM;
5093                 netdev->hw_enc_features |= NETIF_F_TSO;
5094                 netdev->hw_enc_features |= NETIF_F_TSO6;
5095                 netdev->hw_enc_features |= NETIF_F_GSO_PARTIAL;
5096         }
5097
5098         if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
5099                 netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
5100                                            NETIF_F_GSO_UDP_TUNNEL_CSUM;
5101                 netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
5102                                            NETIF_F_GSO_UDP_TUNNEL_CSUM;
5103                 netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
5104                 netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
5105                                          NETIF_F_GSO_UDP_TUNNEL_CSUM;
5106         }
5107
5108         if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
5109                 netdev->hw_features     |= NETIF_F_GSO_GRE |
5110                                            NETIF_F_GSO_GRE_CSUM;
5111                 netdev->hw_enc_features |= NETIF_F_GSO_GRE |
5112                                            NETIF_F_GSO_GRE_CSUM;
5113                 netdev->gso_partial_features |= NETIF_F_GSO_GRE |
5114                                                 NETIF_F_GSO_GRE_CSUM;
5115         }
5116
5117         if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
5118                 netdev->hw_features |= NETIF_F_GSO_IPXIP4 |
5119                                        NETIF_F_GSO_IPXIP6;
5120                 netdev->hw_enc_features |= NETIF_F_GSO_IPXIP4 |
5121                                            NETIF_F_GSO_IPXIP6;
5122                 netdev->gso_partial_features |= NETIF_F_GSO_IPXIP4 |
5123                                                 NETIF_F_GSO_IPXIP6;
5124         }
5125
5126         netdev->gso_partial_features             |= NETIF_F_GSO_UDP_L4;
5127         netdev->hw_features                      |= NETIF_F_GSO_UDP_L4;
5128         netdev->features                         |= NETIF_F_GSO_UDP_L4;
5129
5130         mlx5_query_port_fcs(mdev, &fcs_supported, &fcs_enabled);
5131
5132         if (fcs_supported)
5133                 netdev->hw_features |= NETIF_F_RXALL;
5134
5135         if (MLX5_CAP_ETH(mdev, scatter_fcs))
5136                 netdev->hw_features |= NETIF_F_RXFCS;
5137
5138         if (mlx5_qos_is_supported(mdev))
5139                 netdev->hw_features |= NETIF_F_HW_TC;
5140
5141         netdev->features          = netdev->hw_features;
5142
5143         /* Defaults */
5144         if (fcs_enabled)
5145                 netdev->features  &= ~NETIF_F_RXALL;
5146         netdev->features  &= ~NETIF_F_LRO;
5147         netdev->features  &= ~NETIF_F_GRO_HW;
5148         netdev->features  &= ~NETIF_F_RXFCS;
5149
5150 #define FT_CAP(f) MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive.f)
5151         if (FT_CAP(flow_modify_en) &&
5152             FT_CAP(modify_root) &&
5153             FT_CAP(identified_miss_table_mode) &&
5154             FT_CAP(flow_table_modify)) {
5155 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
5156                 netdev->hw_features      |= NETIF_F_HW_TC;
5157 #endif
5158 #ifdef CONFIG_MLX5_EN_ARFS
5159                 netdev->hw_features      |= NETIF_F_NTUPLE;
5160 #endif
5161         }
5162
5163         netdev->features         |= NETIF_F_HIGHDMA;
5164         netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
5165
5166         netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
5167                                NETDEV_XDP_ACT_XSK_ZEROCOPY |
5168                                NETDEV_XDP_ACT_RX_SG;
5169
5170         netdev->priv_flags       |= IFF_UNICAST_FLT;
5171
5172         netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
5173         mlx5e_set_netdev_dev_addr(netdev);
5174         mlx5e_macsec_build_netdev(priv);
5175         mlx5e_ipsec_build_netdev(priv);
5176         mlx5e_ktls_build_netdev(priv);
5177 }
5178
5179 void mlx5e_create_q_counters(struct mlx5e_priv *priv)
5180 {
5181         u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
5182         u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
5183         struct mlx5_core_dev *mdev = priv->mdev;
5184         int err;
5185
5186         MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
5187         err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
5188         if (!err)
5189                 priv->q_counter =
5190                         MLX5_GET(alloc_q_counter_out, out, counter_set_id);
5191
5192         err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
5193         if (!err)
5194                 priv->drop_rq_q_counter =
5195                         MLX5_GET(alloc_q_counter_out, out, counter_set_id);
5196 }
5197
5198 void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
5199 {
5200         u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
5201
5202         MLX5_SET(dealloc_q_counter_in, in, opcode,
5203                  MLX5_CMD_OP_DEALLOC_Q_COUNTER);
5204         if (priv->q_counter) {
5205                 MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
5206                          priv->q_counter);
5207                 mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
5208         }
5209
5210         if (priv->drop_rq_q_counter) {
5211                 MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
5212                          priv->drop_rq_q_counter);
5213                 mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
5214         }
5215 }
5216
5217 static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
5218                           struct net_device *netdev)
5219 {
5220         struct mlx5e_priv *priv = netdev_priv(netdev);
5221         struct mlx5e_flow_steering *fs;
5222         int err;
5223
5224         mlx5e_build_nic_params(priv, &priv->xsk, netdev->mtu);
5225         mlx5e_vxlan_set_netdev_info(priv);
5226
5227         mlx5e_timestamp_init(priv);
5228
5229         fs = mlx5e_fs_init(priv->profile, mdev,
5230                            !test_bit(MLX5E_STATE_DESTROYING, &priv->state),
5231                            priv->dfs_root);
5232         if (!fs) {
5233                 err = -ENOMEM;
5234                 mlx5_core_err(mdev, "FS initialization failed, %d\n", err);
5235                 return err;
5236         }
5237         priv->fs = fs;
5238
5239         err = mlx5e_ktls_init(priv);
5240         if (err)
5241                 mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
5242
5243         mlx5e_health_create_reporters(priv);
5244         return 0;
5245 }
5246
5247 static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
5248 {
5249         mlx5e_health_destroy_reporters(priv);
5250         mlx5e_ktls_cleanup(priv);
5251         mlx5e_fs_cleanup(priv->fs);
5252 }
5253
5254 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
5255 {
5256         struct mlx5_core_dev *mdev = priv->mdev;
5257         enum mlx5e_rx_res_features features;
5258         int err;
5259
5260         priv->rx_res = mlx5e_rx_res_alloc();
5261         if (!priv->rx_res)
5262                 return -ENOMEM;
5263
5264         mlx5e_create_q_counters(priv);
5265
5266         err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
5267         if (err) {
5268                 mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
5269                 goto err_destroy_q_counters;
5270         }
5271
5272         features = MLX5E_RX_RES_FEATURE_PTP;
5273         if (priv->channels.params.tunneled_offload_en)
5274                 features |= MLX5E_RX_RES_FEATURE_INNER_FT;
5275         err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
5276                                 priv->max_nch, priv->drop_rq.rqn,
5277                                 &priv->channels.params.packet_merge,
5278                                 priv->channels.params.num_channels);
5279         if (err)
5280                 goto err_close_drop_rq;
5281
5282         err = mlx5e_create_flow_steering(priv->fs, priv->rx_res, priv->profile,
5283                                          priv->netdev);
5284         if (err) {
5285                 mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
5286                 goto err_destroy_rx_res;
5287         }
5288
5289         err = mlx5e_tc_nic_init(priv);
5290         if (err)
5291                 goto err_destroy_flow_steering;
5292
5293         err = mlx5e_accel_init_rx(priv);
5294         if (err)
5295                 goto err_tc_nic_cleanup;
5296
5297 #ifdef CONFIG_MLX5_EN_ARFS
5298         priv->netdev->rx_cpu_rmap =  mlx5_eq_table_get_rmap(priv->mdev);
5299 #endif
5300
5301         return 0;
5302
5303 err_tc_nic_cleanup:
5304         mlx5e_tc_nic_cleanup(priv);
5305 err_destroy_flow_steering:
5306         mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
5307                                     priv->profile);
5308 err_destroy_rx_res:
5309         mlx5e_rx_res_destroy(priv->rx_res);
5310 err_close_drop_rq:
5311         mlx5e_close_drop_rq(&priv->drop_rq);
5312 err_destroy_q_counters:
5313         mlx5e_destroy_q_counters(priv);
5314         mlx5e_rx_res_free(priv->rx_res);
5315         priv->rx_res = NULL;
5316         return err;
5317 }
5318
5319 static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
5320 {
5321         mlx5e_accel_cleanup_rx(priv);
5322         mlx5e_tc_nic_cleanup(priv);
5323         mlx5e_destroy_flow_steering(priv->fs, !!(priv->netdev->hw_features & NETIF_F_NTUPLE),
5324                                     priv->profile);
5325         mlx5e_rx_res_destroy(priv->rx_res);
5326         mlx5e_close_drop_rq(&priv->drop_rq);
5327         mlx5e_destroy_q_counters(priv);
5328         mlx5e_rx_res_free(priv->rx_res);
5329         priv->rx_res = NULL;
5330 }
5331
5332 static void mlx5e_set_mqprio_rl(struct mlx5e_priv *priv)
5333 {
5334         struct mlx5e_params *params;
5335         struct mlx5e_mqprio_rl *rl;
5336
5337         params = &priv->channels.params;
5338         if (params->mqprio.mode != TC_MQPRIO_MODE_CHANNEL)
5339                 return;
5340
5341         rl = mlx5e_mqprio_rl_create(priv->mdev, params->mqprio.num_tc,
5342                                     params->mqprio.channel.max_rate);
5343         if (IS_ERR(rl))
5344                 rl = NULL;
5345         priv->mqprio_rl = rl;
5346         mlx5e_mqprio_rl_update_params(params, rl);
5347 }
5348
5349 static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
5350 {
5351         int err;
5352
5353         err = mlx5e_create_tises(priv);
5354         if (err) {
5355                 mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err);
5356                 return err;
5357         }
5358
5359         err = mlx5e_accel_init_tx(priv);
5360         if (err)
5361                 goto err_destroy_tises;
5362
5363         mlx5e_set_mqprio_rl(priv);
5364         mlx5e_dcbnl_initialize(priv);
5365         return 0;
5366
5367 err_destroy_tises:
5368         mlx5e_destroy_tises(priv);
5369         return err;
5370 }
5371
5372 static void mlx5e_nic_enable(struct mlx5e_priv *priv)
5373 {
5374         struct net_device *netdev = priv->netdev;
5375         struct mlx5_core_dev *mdev = priv->mdev;
5376         int err;
5377
5378         mlx5e_fs_init_l2_addr(priv->fs, netdev);
5379         mlx5e_ipsec_init(priv);
5380
5381         err = mlx5e_macsec_init(priv);
5382         if (err)
5383                 mlx5_core_err(mdev, "MACsec initialization failed, %d\n", err);
5384
5385         /* Marking the link as currently not needed by the Driver */
5386         if (!netif_running(netdev))
5387                 mlx5e_modify_admin_state(mdev, MLX5_PORT_DOWN);
5388
5389         mlx5e_set_netdev_mtu_boundaries(priv);
5390         mlx5e_set_dev_port_mtu(priv);
5391
5392         mlx5_lag_add_netdev(mdev, netdev);
5393
5394         mlx5e_enable_async_events(priv);
5395         mlx5e_enable_blocking_events(priv);
5396         if (mlx5e_monitor_counter_supported(priv))
5397                 mlx5e_monitor_counter_init(priv);
5398
5399         mlx5e_hv_vhca_stats_create(priv);
5400         if (netdev->reg_state != NETREG_REGISTERED)
5401                 return;
5402         mlx5e_dcbnl_init_app(priv);
5403
5404         mlx5e_nic_set_rx_mode(priv);
5405
5406         rtnl_lock();
5407         if (netif_running(netdev))
5408                 mlx5e_open(netdev);
5409         udp_tunnel_nic_reset_ntf(priv->netdev);
5410         netif_device_attach(netdev);
5411         rtnl_unlock();
5412 }
5413
5414 static void mlx5e_nic_disable(struct mlx5e_priv *priv)
5415 {
5416         struct mlx5_core_dev *mdev = priv->mdev;
5417
5418         if (priv->netdev->reg_state == NETREG_REGISTERED)
5419                 mlx5e_dcbnl_delete_app(priv);
5420
5421         rtnl_lock();
5422         if (netif_running(priv->netdev))
5423                 mlx5e_close(priv->netdev);
5424         netif_device_detach(priv->netdev);
5425         rtnl_unlock();
5426
5427         mlx5e_nic_set_rx_mode(priv);
5428
5429         mlx5e_hv_vhca_stats_destroy(priv);
5430         if (mlx5e_monitor_counter_supported(priv))
5431                 mlx5e_monitor_counter_cleanup(priv);
5432
5433         mlx5e_disable_blocking_events(priv);
5434         if (priv->en_trap) {
5435                 mlx5e_deactivate_trap(priv);
5436                 mlx5e_close_trap(priv->en_trap);
5437                 priv->en_trap = NULL;
5438         }
5439         mlx5e_disable_async_events(priv);
5440         mlx5_lag_remove_netdev(mdev, priv->netdev);
5441         mlx5_vxlan_reset_to_default(mdev->vxlan);
5442         mlx5e_macsec_cleanup(priv);
5443         mlx5e_ipsec_cleanup(priv);
5444 }
5445
5446 int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
5447 {
5448         return mlx5e_refresh_tirs(priv, false, false);
5449 }
5450
5451 static const struct mlx5e_profile mlx5e_nic_profile = {
5452         .init              = mlx5e_nic_init,
5453         .cleanup           = mlx5e_nic_cleanup,
5454         .init_rx           = mlx5e_init_nic_rx,
5455         .cleanup_rx        = mlx5e_cleanup_nic_rx,
5456         .init_tx           = mlx5e_init_nic_tx,
5457         .cleanup_tx        = mlx5e_cleanup_nic_tx,
5458         .enable            = mlx5e_nic_enable,
5459         .disable           = mlx5e_nic_disable,
5460         .update_rx         = mlx5e_update_nic_rx,
5461         .update_stats      = mlx5e_stats_update_ndo_stats,
5462         .update_carrier    = mlx5e_update_carrier,
5463         .rx_handlers       = &mlx5e_rx_handlers_nic,
5464         .max_tc            = MLX5E_MAX_NUM_TC,
5465         .stats_grps        = mlx5e_nic_stats_grps,
5466         .stats_grps_num    = mlx5e_nic_stats_grps_num,
5467         .features          = BIT(MLX5E_PROFILE_FEATURE_PTP_RX) |
5468                 BIT(MLX5E_PROFILE_FEATURE_PTP_TX) |
5469                 BIT(MLX5E_PROFILE_FEATURE_QOS_HTB) |
5470                 BIT(MLX5E_PROFILE_FEATURE_FS_VLAN) |
5471                 BIT(MLX5E_PROFILE_FEATURE_FS_TC),
5472 };
5473
5474 static int mlx5e_profile_max_num_channels(struct mlx5_core_dev *mdev,
5475                                           const struct mlx5e_profile *profile)
5476 {
5477         int nch;
5478
5479         nch = mlx5e_get_max_num_channels(mdev);
5480
5481         if (profile->max_nch_limit)
5482                 nch = min_t(int, nch, profile->max_nch_limit(mdev));
5483         return nch;
5484 }
5485
5486 static unsigned int
5487 mlx5e_calc_max_nch(struct mlx5_core_dev *mdev, struct net_device *netdev,
5488                    const struct mlx5e_profile *profile)
5489
5490 {
5491         unsigned int max_nch, tmp;
5492
5493         /* core resources */
5494         max_nch = mlx5e_profile_max_num_channels(mdev, profile);
5495
5496         /* netdev rx queues */
5497         max_nch = min_t(unsigned int, max_nch, netdev->num_rx_queues);
5498
5499         /* netdev tx queues */
5500         tmp = netdev->num_tx_queues;
5501         if (mlx5_qos_is_supported(mdev))
5502                 tmp -= mlx5e_qos_max_leaf_nodes(mdev);
5503         if (MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn))
5504                 tmp -= profile->max_tc;
5505         tmp = tmp / profile->max_tc;
5506         max_nch = min_t(unsigned int, max_nch, tmp);
5507
5508         return max_nch;
5509 }
5510
5511 int mlx5e_get_pf_num_tirs(struct mlx5_core_dev *mdev)
5512 {
5513         /* Indirect TIRS: 2 sets of TTCs (inner + outer steering)
5514          * and 1 set of direct TIRS
5515          */
5516         return 2 * MLX5E_NUM_INDIR_TIRS
5517                 + mlx5e_profile_max_num_channels(mdev, &mlx5e_nic_profile);
5518 }
5519
5520 void mlx5e_set_rx_mode_work(struct work_struct *work)
5521 {
5522         struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv,
5523                                                set_rx_mode_work);
5524
5525         return mlx5e_fs_set_rx_mode_work(priv->fs, priv->netdev);
5526 }
5527
5528 /* mlx5e generic netdev management API (move to en_common.c) */
5529 int mlx5e_priv_init(struct mlx5e_priv *priv,
5530                     const struct mlx5e_profile *profile,
5531                     struct net_device *netdev,
5532                     struct mlx5_core_dev *mdev)
5533 {
5534         int nch, num_txqs, node;
5535         int err;
5536
5537         num_txqs = netdev->num_tx_queues;
5538         nch = mlx5e_calc_max_nch(mdev, netdev, profile);
5539         node = dev_to_node(mlx5_core_dma_dev(mdev));
5540
5541         /* priv init */
5542         priv->mdev        = mdev;
5543         priv->netdev      = netdev;
5544         priv->msglevel    = MLX5E_MSG_LEVEL;
5545         priv->max_nch     = nch;
5546         priv->max_opened_tc = 1;
5547
5548         if (!alloc_cpumask_var(&priv->scratchpad.cpumask, GFP_KERNEL))
5549                 return -ENOMEM;
5550
5551         mutex_init(&priv->state_lock);
5552
5553         err = mlx5e_selq_init(&priv->selq, &priv->state_lock);
5554         if (err)
5555                 goto err_free_cpumask;
5556
5557         INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work);
5558         INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work);
5559         INIT_WORK(&priv->tx_timeout_work, mlx5e_tx_timeout_work);
5560         INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work);
5561
5562         priv->wq = create_singlethread_workqueue("mlx5e");
5563         if (!priv->wq)
5564                 goto err_free_selq;
5565
5566         priv->txq2sq = kcalloc_node(num_txqs, sizeof(*priv->txq2sq), GFP_KERNEL, node);
5567         if (!priv->txq2sq)
5568                 goto err_destroy_workqueue;
5569
5570         priv->tx_rates = kcalloc_node(num_txqs, sizeof(*priv->tx_rates), GFP_KERNEL, node);
5571         if (!priv->tx_rates)
5572                 goto err_free_txq2sq;
5573
5574         priv->channel_stats =
5575                 kcalloc_node(nch, sizeof(*priv->channel_stats), GFP_KERNEL, node);
5576         if (!priv->channel_stats)
5577                 goto err_free_tx_rates;
5578
5579         return 0;
5580
5581 err_free_tx_rates:
5582         kfree(priv->tx_rates);
5583 err_free_txq2sq:
5584         kfree(priv->txq2sq);
5585 err_destroy_workqueue:
5586         destroy_workqueue(priv->wq);
5587 err_free_selq:
5588         mlx5e_selq_cleanup(&priv->selq);
5589 err_free_cpumask:
5590         free_cpumask_var(priv->scratchpad.cpumask);
5591         return -ENOMEM;
5592 }
5593
5594 void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
5595 {
5596         int i;
5597
5598         /* bail if change profile failed and also rollback failed */
5599         if (!priv->mdev)
5600                 return;
5601
5602         for (i = 0; i < priv->stats_nch; i++)
5603                 kvfree(priv->channel_stats[i]);
5604         kfree(priv->channel_stats);
5605         kfree(priv->tx_rates);
5606         kfree(priv->txq2sq);
5607         destroy_workqueue(priv->wq);
5608         mutex_lock(&priv->state_lock);
5609         mlx5e_selq_cleanup(&priv->selq);
5610         mutex_unlock(&priv->state_lock);
5611         free_cpumask_var(priv->scratchpad.cpumask);
5612
5613         for (i = 0; i < priv->htb_max_qos_sqs; i++)
5614                 kfree(priv->htb_qos_sq_stats[i]);
5615         kvfree(priv->htb_qos_sq_stats);
5616
5617         memset(priv, 0, sizeof(*priv));
5618 }
5619
5620 static unsigned int mlx5e_get_max_num_txqs(struct mlx5_core_dev *mdev,
5621                                            const struct mlx5e_profile *profile)
5622 {
5623         unsigned int nch, ptp_txqs, qos_txqs;
5624
5625         nch = mlx5e_profile_max_num_channels(mdev, profile);
5626
5627         ptp_txqs = MLX5_CAP_GEN(mdev, ts_cqe_to_dest_cqn) &&
5628                 mlx5e_profile_feature_cap(profile, PTP_TX) ?
5629                 profile->max_tc : 0;
5630
5631         qos_txqs = mlx5_qos_is_supported(mdev) &&
5632                 mlx5e_profile_feature_cap(profile, QOS_HTB) ?
5633                 mlx5e_qos_max_leaf_nodes(mdev) : 0;
5634
5635         return nch * profile->max_tc + ptp_txqs + qos_txqs;
5636 }
5637
5638 static unsigned int mlx5e_get_max_num_rxqs(struct mlx5_core_dev *mdev,
5639                                            const struct mlx5e_profile *profile)
5640 {
5641         return mlx5e_profile_max_num_channels(mdev, profile);
5642 }
5643
5644 struct net_device *
5645 mlx5e_create_netdev(struct mlx5_core_dev *mdev, const struct mlx5e_profile *profile)
5646 {
5647         struct net_device *netdev;
5648         unsigned int txqs, rxqs;
5649         int err;
5650
5651         txqs = mlx5e_get_max_num_txqs(mdev, profile);
5652         rxqs = mlx5e_get_max_num_rxqs(mdev, profile);
5653
5654         netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), txqs, rxqs);
5655         if (!netdev) {
5656                 mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n");
5657                 return NULL;
5658         }
5659
5660         err = mlx5e_priv_init(netdev_priv(netdev), profile, netdev, mdev);
5661         if (err) {
5662                 mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
5663                 goto err_free_netdev;
5664         }
5665
5666         netif_carrier_off(netdev);
5667         netif_tx_disable(netdev);
5668         dev_net_set(netdev, mlx5_core_net(mdev));
5669
5670         return netdev;
5671
5672 err_free_netdev:
5673         free_netdev(netdev);
5674
5675         return NULL;
5676 }
5677
5678 static void mlx5e_update_features(struct net_device *netdev)
5679 {
5680         if (netdev->reg_state != NETREG_REGISTERED)
5681                 return; /* features will be updated on netdev registration */
5682
5683         rtnl_lock();
5684         netdev_update_features(netdev);
5685         rtnl_unlock();
5686 }
5687
5688 static void mlx5e_reset_channels(struct net_device *netdev)
5689 {
5690         netdev_reset_tc(netdev);
5691 }
5692
5693 int mlx5e_attach_netdev(struct mlx5e_priv *priv)
5694 {
5695         const bool take_rtnl = priv->netdev->reg_state == NETREG_REGISTERED;
5696         const struct mlx5e_profile *profile = priv->profile;
5697         int max_nch;
5698         int err;
5699
5700         clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
5701         if (priv->fs)
5702                 mlx5e_fs_set_state_destroy(priv->fs,
5703                                            !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
5704
5705         /* Validate the max_wqe_size_sq capability. */
5706         if (WARN_ON_ONCE(mlx5e_get_max_sq_wqebbs(priv->mdev) < MLX5E_MAX_TX_WQEBBS)) {
5707                 mlx5_core_warn(priv->mdev, "MLX5E: Max SQ WQEBBs firmware capability: %u, needed %lu\n",
5708                                mlx5e_get_max_sq_wqebbs(priv->mdev), MLX5E_MAX_TX_WQEBBS);
5709                 return -EIO;
5710         }
5711
5712         /* max number of channels may have changed */
5713         max_nch = mlx5e_calc_max_nch(priv->mdev, priv->netdev, profile);
5714         if (priv->channels.params.num_channels > max_nch) {
5715                 mlx5_core_warn(priv->mdev, "MLX5E: Reducing number of channels to %d\n", max_nch);
5716                 /* Reducing the number of channels - RXFH has to be reset, and
5717                  * mlx5e_num_channels_changed below will build the RQT.
5718                  */
5719                 priv->netdev->priv_flags &= ~IFF_RXFH_CONFIGURED;
5720                 priv->channels.params.num_channels = max_nch;
5721                 if (priv->channels.params.mqprio.mode == TC_MQPRIO_MODE_CHANNEL) {
5722                         mlx5_core_warn(priv->mdev, "MLX5E: Disabling MQPRIO channel mode\n");
5723                         mlx5e_params_mqprio_reset(&priv->channels.params);
5724                 }
5725         }
5726         if (max_nch != priv->max_nch) {
5727                 mlx5_core_warn(priv->mdev,
5728                                "MLX5E: Updating max number of channels from %u to %u\n",
5729                                priv->max_nch, max_nch);
5730                 priv->max_nch = max_nch;
5731         }
5732
5733         /* 1. Set the real number of queues in the kernel the first time.
5734          * 2. Set our default XPS cpumask.
5735          * 3. Build the RQT.
5736          *
5737          * rtnl_lock is required by netif_set_real_num_*_queues in case the
5738          * netdev has been registered by this point (if this function was called
5739          * in the reload or resume flow).
5740          */
5741         if (take_rtnl)
5742                 rtnl_lock();
5743         err = mlx5e_num_channels_changed(priv);
5744         if (take_rtnl)
5745                 rtnl_unlock();
5746         if (err)
5747                 goto out;
5748
5749         err = profile->init_tx(priv);
5750         if (err)
5751                 goto out;
5752
5753         err = profile->init_rx(priv);
5754         if (err)
5755                 goto err_cleanup_tx;
5756
5757         if (profile->enable)
5758                 profile->enable(priv);
5759
5760         mlx5e_update_features(priv->netdev);
5761
5762         return 0;
5763
5764 err_cleanup_tx:
5765         profile->cleanup_tx(priv);
5766
5767 out:
5768         mlx5e_reset_channels(priv->netdev);
5769         set_bit(MLX5E_STATE_DESTROYING, &priv->state);
5770         if (priv->fs)
5771                 mlx5e_fs_set_state_destroy(priv->fs,
5772                                            !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
5773         cancel_work_sync(&priv->update_stats_work);
5774         return err;
5775 }
5776
5777 void mlx5e_detach_netdev(struct mlx5e_priv *priv)
5778 {
5779         const struct mlx5e_profile *profile = priv->profile;
5780
5781         set_bit(MLX5E_STATE_DESTROYING, &priv->state);
5782         if (priv->fs)
5783                 mlx5e_fs_set_state_destroy(priv->fs,
5784                                            !test_bit(MLX5E_STATE_DESTROYING, &priv->state));
5785
5786         if (profile->disable)
5787                 profile->disable(priv);
5788         flush_workqueue(priv->wq);
5789
5790         profile->cleanup_rx(priv);
5791         profile->cleanup_tx(priv);
5792         mlx5e_reset_channels(priv->netdev);
5793         cancel_work_sync(&priv->update_stats_work);
5794 }
5795
5796 static int
5797 mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
5798                             const struct mlx5e_profile *new_profile, void *new_ppriv)
5799 {
5800         struct mlx5e_priv *priv = netdev_priv(netdev);
5801         int err;
5802
5803         err = mlx5e_priv_init(priv, new_profile, netdev, mdev);
5804         if (err) {
5805                 mlx5_core_err(mdev, "mlx5e_priv_init failed, err=%d\n", err);
5806                 return err;
5807         }
5808         netif_carrier_off(netdev);
5809         priv->profile = new_profile;
5810         priv->ppriv = new_ppriv;
5811         err = new_profile->init(priv->mdev, priv->netdev);
5812         if (err)
5813                 goto priv_cleanup;
5814         err = mlx5e_attach_netdev(priv);
5815         if (err)
5816                 goto profile_cleanup;
5817         return err;
5818
5819 profile_cleanup:
5820         new_profile->cleanup(priv);
5821 priv_cleanup:
5822         mlx5e_priv_cleanup(priv);
5823         return err;
5824 }
5825
5826 int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
5827                                 const struct mlx5e_profile *new_profile, void *new_ppriv)
5828 {
5829         const struct mlx5e_profile *orig_profile = priv->profile;
5830         struct net_device *netdev = priv->netdev;
5831         struct mlx5_core_dev *mdev = priv->mdev;
5832         void *orig_ppriv = priv->ppriv;
5833         int err, rollback_err;
5834
5835         /* cleanup old profile */
5836         mlx5e_detach_netdev(priv);
5837         priv->profile->cleanup(priv);
5838         mlx5e_priv_cleanup(priv);
5839
5840         err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
5841         if (err) { /* roll back to original profile */
5842                 netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
5843                 goto rollback;
5844         }
5845
5846         return 0;
5847
5848 rollback:
5849         rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
5850         if (rollback_err)
5851                 netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
5852                            __func__, rollback_err);
5853         return err;
5854 }
5855
5856 void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
5857 {
5858         mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
5859 }
5860
5861 void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
5862 {
5863         struct net_device *netdev = priv->netdev;
5864
5865         mlx5e_priv_cleanup(priv);
5866         free_netdev(netdev);
5867 }
5868
5869 static int mlx5e_resume(struct auxiliary_device *adev)
5870 {
5871         struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
5872         struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
5873         struct mlx5e_priv *priv = mlx5e_dev->priv;
5874         struct net_device *netdev = priv->netdev;
5875         struct mlx5_core_dev *mdev = edev->mdev;
5876         int err;
5877
5878         if (netif_device_present(netdev))
5879                 return 0;
5880
5881         err = mlx5e_create_mdev_resources(mdev);
5882         if (err)
5883                 return err;
5884
5885         err = mlx5e_attach_netdev(priv);
5886         if (err) {
5887                 mlx5e_destroy_mdev_resources(mdev);
5888                 return err;
5889         }
5890
5891         return 0;
5892 }
5893
5894 static int mlx5e_suspend(struct auxiliary_device *adev, pm_message_t state)
5895 {
5896         struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
5897         struct mlx5e_priv *priv = mlx5e_dev->priv;
5898         struct net_device *netdev = priv->netdev;
5899         struct mlx5_core_dev *mdev = priv->mdev;
5900
5901         if (!netif_device_present(netdev))
5902                 return -ENODEV;
5903
5904         mlx5e_detach_netdev(priv);
5905         mlx5e_destroy_mdev_resources(mdev);
5906         return 0;
5907 }
5908
5909 static int mlx5e_probe(struct auxiliary_device *adev,
5910                        const struct auxiliary_device_id *id)
5911 {
5912         struct mlx5_adev *edev = container_of(adev, struct mlx5_adev, adev);
5913         const struct mlx5e_profile *profile = &mlx5e_nic_profile;
5914         struct mlx5_core_dev *mdev = edev->mdev;
5915         struct mlx5e_dev *mlx5e_dev;
5916         struct net_device *netdev;
5917         pm_message_t state = {};
5918         struct mlx5e_priv *priv;
5919         int err;
5920
5921         mlx5e_dev = mlx5e_create_devlink(&adev->dev, mdev);
5922         if (IS_ERR(mlx5e_dev))
5923                 return PTR_ERR(mlx5e_dev);
5924         auxiliary_set_drvdata(adev, mlx5e_dev);
5925
5926         err = mlx5e_devlink_port_register(mlx5e_dev, mdev);
5927         if (err) {
5928                 mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
5929                 goto err_devlink_unregister;
5930         }
5931
5932         netdev = mlx5e_create_netdev(mdev, profile);
5933         if (!netdev) {
5934                 mlx5_core_err(mdev, "mlx5e_create_netdev failed\n");
5935                 err = -ENOMEM;
5936                 goto err_devlink_port_unregister;
5937         }
5938         SET_NETDEV_DEVLINK_PORT(netdev, &mlx5e_dev->dl_port);
5939
5940         mlx5e_build_nic_netdev(netdev);
5941
5942         priv = netdev_priv(netdev);
5943         mlx5e_dev->priv = priv;
5944
5945         priv->profile = profile;
5946         priv->ppriv = NULL;
5947
5948         priv->dfs_root = debugfs_create_dir("nic",
5949                                             mlx5_debugfs_get_dev_root(priv->mdev));
5950
5951         err = profile->init(mdev, netdev);
5952         if (err) {
5953                 mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
5954                 goto err_destroy_netdev;
5955         }
5956
5957         err = mlx5e_resume(adev);
5958         if (err) {
5959                 mlx5_core_err(mdev, "mlx5e_resume failed, %d\n", err);
5960                 goto err_profile_cleanup;
5961         }
5962
5963         err = register_netdev(netdev);
5964         if (err) {
5965                 mlx5_core_err(mdev, "register_netdev failed, %d\n", err);
5966                 goto err_resume;
5967         }
5968
5969         mlx5e_dcbnl_init_app(priv);
5970         mlx5_core_uplink_netdev_set(mdev, netdev);
5971         mlx5e_params_print_info(mdev, &priv->channels.params);
5972         return 0;
5973
5974 err_resume:
5975         mlx5e_suspend(adev, state);
5976 err_profile_cleanup:
5977         profile->cleanup(priv);
5978 err_destroy_netdev:
5979         debugfs_remove_recursive(priv->dfs_root);
5980         mlx5e_destroy_netdev(priv);
5981 err_devlink_port_unregister:
5982         mlx5e_devlink_port_unregister(mlx5e_dev);
5983 err_devlink_unregister:
5984         mlx5e_destroy_devlink(mlx5e_dev);
5985         return err;
5986 }
5987
5988 static void mlx5e_remove(struct auxiliary_device *adev)
5989 {
5990         struct mlx5e_dev *mlx5e_dev = auxiliary_get_drvdata(adev);
5991         struct mlx5e_priv *priv = mlx5e_dev->priv;
5992         pm_message_t state = {};
5993
5994         mlx5_core_uplink_netdev_set(priv->mdev, NULL);
5995         mlx5e_dcbnl_delete_app(priv);
5996         unregister_netdev(priv->netdev);
5997         mlx5e_suspend(adev, state);
5998         priv->profile->cleanup(priv);
5999         debugfs_remove_recursive(priv->dfs_root);
6000         mlx5e_destroy_netdev(priv);
6001         mlx5e_devlink_port_unregister(mlx5e_dev);
6002         mlx5e_destroy_devlink(mlx5e_dev);
6003 }
6004
6005 static const struct auxiliary_device_id mlx5e_id_table[] = {
6006         { .name = MLX5_ADEV_NAME ".eth", },
6007         {},
6008 };
6009
6010 MODULE_DEVICE_TABLE(auxiliary, mlx5e_id_table);
6011
6012 static struct auxiliary_driver mlx5e_driver = {
6013         .name = "eth",
6014         .probe = mlx5e_probe,
6015         .remove = mlx5e_remove,
6016         .suspend = mlx5e_suspend,
6017         .resume = mlx5e_resume,
6018         .id_table = mlx5e_id_table,
6019 };
6020
6021 int mlx5e_init(void)
6022 {
6023         int ret;
6024
6025         mlx5e_build_ptys2ethtool_map();
6026         ret = auxiliary_driver_register(&mlx5e_driver);
6027         if (ret)
6028                 return ret;
6029
6030         ret = mlx5e_rep_init();
6031         if (ret)
6032                 auxiliary_driver_unregister(&mlx5e_driver);
6033         return ret;
6034 }
6035
6036 void mlx5e_cleanup(void)
6037 {
6038         mlx5e_rep_cleanup();
6039         auxiliary_driver_unregister(&mlx5e_driver);
6040 }