drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c

   1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
   2 /* Copyright (c) 2019 Mellanox Technologies. */
   3
   4 #include "rx.h"
   5 #include "en/xdp.h"
   6 #include <net/xdp_sock_drv.h>
   7 #include <linux/filter.h>
   8
   9 /* RX data path */
  10
  11 static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp)
  12 {
  13         /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk
  14          * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
  15          */
  16         return (struct mlx5e_xdp_buff *)xdp;
  17 }
  18
  19 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
  20 {
  21         struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
  22         struct mlx5e_icosq *icosq = rq->icosq;
  23         struct mlx5_wq_cyc *wq = &icosq->wq;
  24         struct mlx5e_umr_wqe *umr_wqe;
  25         struct xdp_buff **xsk_buffs;
  26         int batch, i;
  27         u32 offset; /* 17-bit value with MTT. */
  28         u16 pi;
  29
  30         if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
  31                 goto err;
  32
  33         XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
  34         xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs;
  35         batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs,
  36                                      rq->mpwqe.pages_per_wqe);
  37
  38         /* If batch < pages_per_wqe, either:
  39          * 1. Some (or all) descriptors were invalid.
  40          * 2. dma_need_sync is true, and it fell back to allocating one frame.
  41          * In either case, try to continue allocating frames one by one, until
  42          * the first error, which will mean there are no more valid descriptors.
  43          */
  44         for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
  45                 xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool);
  46                 if (unlikely(!xsk_buffs[batch]))
  47                         goto err_reuse_batch;
  48         }
  49
  50         pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
  51         umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
  52         memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
  53
  54         if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
  55                 for (i = 0; i < batch; i++) {
  56                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
  57                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
  58
  59                         umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
  60                                 .ptag = cpu_to_be64(addr | MLX5_EN_WR),
  61                         };
  62                         mxbuf->rq = rq;
  63                 }
  64         } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
  65                 for (i = 0; i < batch; i++) {
  66                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
  67                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
  68
  69                         umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
  70                                 .key = rq->mkey_be,
  71                                 .va = cpu_to_be64(addr),
  72                         };
  73                         mxbuf->rq = rq;
  74                 }
  75         } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
  76                 u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
  77
  78                 for (i = 0; i < batch; i++) {
  79                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
  80                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
  81
  82                         umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
  83                                 .key = rq->mkey_be,
  84                                 .va = cpu_to_be64(addr),
  85                         };
  86                         umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
  87                                 .key = rq->mkey_be,
  88                                 .va = cpu_to_be64(addr + mapping_size),
  89                         };
  90                         umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
  91                                 .key = rq->mkey_be,
  92                                 .va = cpu_to_be64(addr + mapping_size * 2),
  93                         };
  94                         umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
  95                                 .key = rq->mkey_be,
  96                                 .va = cpu_to_be64(rq->wqe_overflow.addr),
  97                         };
  98                         mxbuf->rq = rq;
  99                 }
 100         } else {
 101                 __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
 102                                               rq->xsk_pool->chunk_size);
 103                 __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
 104
 105                 for (i = 0; i < batch; i++) {
 106                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
 107                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
 108
 109                         umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
 110                                 .key = rq->mkey_be,
 111                                 .va = cpu_to_be64(addr),
 112                                 .bcount = frame_size,
 113                         };
 114                         umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
 115                                 .key = rq->mkey_be,
 116                                 .va = cpu_to_be64(rq->wqe_overflow.addr),
 117                                 .bcount = pad_size,
 118                         };
 119                         mxbuf->rq = rq;
 120                 }
 121         }
 122
 123         bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
 124         wi->consumed_strides = 0;
 125
 126         umr_wqe->ctrl.opmod_idx_opcode =
 127                 cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
 128
 129         /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
 130         offset = ix * rq->mpwqe.mtts_per_wqe;
 131         if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
 132                 offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
 133         else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
 134                 offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
 135         else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
 136                 offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
 137         umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
 138
 139         icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
 140                 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
 141                 .num_wqebbs = rq->mpwqe.umr_wqebbs,
 142                 .umr.rq = rq,
 143         };
 144
 145         icosq->pc += rq->mpwqe.umr_wqebbs;
 146
 147         icosq->doorbell_cseg = &umr_wqe->ctrl;
 148
 149         return 0;
 150
 151 err_reuse_batch:
 152         while (--batch >= 0)
 153                 xsk_buff_free(xsk_buffs[batch]);
 154
 155 err:
 156         rq->stats->buff_alloc_err++;
 157         return -ENOMEM;
 158 }
 159
 160 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
 161 {
 162         struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 163         struct xdp_buff **buffs;
 164         u32 contig, alloc;
 165         int i;
 166
 167         /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the
 168          * rq->wqe.alloc_units->xsk_buffs array allocated here.
 169          */
 170         buffs = rq->wqe.alloc_units->xsk_buffs;
 171         contig = mlx5_wq_cyc_get_size(wq) - ix;
 172         if (wqe_bulk <= contig) {
 173                 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
 174         } else {
 175                 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
 176                 if (likely(alloc == contig))
 177                         alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
 178         }
 179
 180         for (i = 0; i < alloc; i++) {
 181                 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
 182                 struct mlx5e_wqe_frag_info *frag;
 183                 struct mlx5e_rx_wqe_cyc *wqe;
 184                 dma_addr_t addr;
 185
 186                 wqe = mlx5_wq_cyc_get_wqe(wq, j);
 187                 /* Assumes log_num_frags == 0. */
 188                 frag = &rq->wqe.frags[j];
 189
 190                 addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
 191                 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
 192                 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
 193         }
 194
 195         return alloc;
 196 }
 197
 198 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
 199 {
 200         struct mlx5_wq_cyc *wq = &rq->wqe.wq;
 201         int i;
 202
 203         for (i = 0; i < wqe_bulk; i++) {
 204                 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
 205                 struct mlx5e_wqe_frag_info *frag;
 206                 struct mlx5e_rx_wqe_cyc *wqe;
 207                 dma_addr_t addr;
 208
 209                 wqe = mlx5_wq_cyc_get_wqe(wq, j);
 210                 /* Assumes log_num_frags == 0. */
 211                 frag = &rq->wqe.frags[j];
 212
 213                 *frag->xskp = xsk_buff_alloc(rq->xsk_pool);
 214                 if (unlikely(!*frag->xskp))
 215                         return i;
 216
 217                 addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
 218                 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
 219                 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
 220         }
 221
 222         return wqe_bulk;
 223 }
 224
 225 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
 226 {
 227         u32 totallen = xdp->data_end - xdp->data_meta;
 228         u32 metalen = xdp->data - xdp->data_meta;
 229         struct sk_buff *skb;
 230
 231         skb = napi_alloc_skb(rq->cq.napi, totallen);
 232         if (unlikely(!skb)) {
 233                 rq->stats->buff_alloc_err++;
 234                 return NULL;
 235         }
 236
 237         skb_put_data(skb, xdp->data_meta, totallen);
 238
 239         if (metalen) {
 240                 skb_metadata_set(skb, metalen);
 241                 __skb_pull(skb, metalen);
 242         }
 243
 244         return skb;
 245 }
 246
 247 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 248                                                     struct mlx5e_mpw_info *wi,
 249                                                     struct mlx5_cqe64 *cqe,
 250                                                     u16 cqe_bcnt,
 251                                                     u32 head_offset,
 252                                                     u32 page_idx)
 253 {
 254         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]);
 255         struct bpf_prog *prog;
 256
 257         /* Check packet size. Note LRO doesn't use linear SKB */
 258         if (unlikely(cqe_bcnt > rq->hw_mtu)) {
 259                 rq->stats->oversize_pkts_sw_drop++;
 260                 return NULL;
 261         }
 262
 263         /* head_offset is not used in this function, because xdp->data and the
 264          * DMA address point directly to the necessary place. Furthermore, in
 265          * the current implementation, UMR pages are mapped to XSK frames, so
 266          * head_offset should always be 0.
 267          */
 268         WARN_ON_ONCE(head_offset);
 269
 270         /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
 271         mxbuf->cqe = cqe;
 272         xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
 273         xsk_buff_dma_sync_for_cpu(&mxbuf->xdp);
 274         net_prefetch(mxbuf->xdp.data);
 275
 276         /* Possible flows:
 277          * - XDP_REDIRECT to XSKMAP:
 278          *   The page is owned by the userspace from now.
 279          * - XDP_TX and other XDP_REDIRECTs:
 280          *   The page was returned by ZCA and recycled.
 281          * - XDP_DROP:
 282          *   Recycle the page.
 283          * - XDP_PASS:
 284          *   Allocate an SKB, copy the data and recycle the page.
 285          *
 286          * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
 287          * size is the same as the Driver RX Ring's size, and pages for WQEs are
 288          * allocated first from the Reuse Ring, so it has enough space.
 289          */
 290
 291         prog = rcu_dereference(rq->xdp_prog);
 292         if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
 293                 if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
 294                         __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
 295                 return NULL; /* page/packet was consumed by XDP */
 296         }
 297
 298         /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
 299          * frame. On SKB allocation failure, NULL is returned.
 300          */
 301         return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
 302 }
 303
 304 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 305                                               struct mlx5e_wqe_frag_info *wi,
 306                                               struct mlx5_cqe64 *cqe,
 307                                               u32 cqe_bcnt)
 308 {
 309         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp);
 310         struct bpf_prog *prog;
 311
 312         /* wi->offset is not used in this function, because xdp->data and the
 313          * DMA address point directly to the necessary place. Furthermore, the
 314          * XSK allocator allocates frames per packet, instead of pages, so
 315          * wi->offset should always be 0.
 316          */
 317         WARN_ON_ONCE(wi->offset);
 318
 319         /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
 320         mxbuf->cqe = cqe;
 321         xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
 322         xsk_buff_dma_sync_for_cpu(&mxbuf->xdp);
 323         net_prefetch(mxbuf->xdp.data);
 324
 325         prog = rcu_dereference(rq->xdp_prog);
 326         if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
 327                 if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
 328                         wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
 329                 return NULL; /* page/packet was consumed by XDP */
 330         }
 331
 332         /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
 333          * will be handled by mlx5e_free_rx_wqe.
 334          * On SKB allocation failure, NULL is returned.
 335          */
 336         return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
 337 }