ACPI: thermal: Install Notify() handler directly
[linux-block.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / xsk / rx.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include "rx.h"
5 #include "en/xdp.h"
6 #include <net/xdp_sock_drv.h>
7 #include <linux/filter.h>
8
9 /* RX data path */
10
11 static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp)
12 {
13         /* mlx5e_xdp_buff shares its layout with xdp_buff_xsk
14          * and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
15          */
16         return (struct mlx5e_xdp_buff *)xdp;
17 }
18
19 int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
20 {
21         struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
22         struct mlx5e_icosq *icosq = rq->icosq;
23         struct mlx5_wq_cyc *wq = &icosq->wq;
24         struct mlx5e_umr_wqe *umr_wqe;
25         struct xdp_buff **xsk_buffs;
26         int batch, i;
27         u32 offset; /* 17-bit value with MTT. */
28         u16 pi;
29
30         if (unlikely(!xsk_buff_can_alloc(rq->xsk_pool, rq->mpwqe.pages_per_wqe)))
31                 goto err;
32
33         XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
34         xsk_buffs = (struct xdp_buff **)wi->alloc_units.xsk_buffs;
35         batch = xsk_buff_alloc_batch(rq->xsk_pool, xsk_buffs,
36                                      rq->mpwqe.pages_per_wqe);
37
38         /* If batch < pages_per_wqe, either:
39          * 1. Some (or all) descriptors were invalid.
40          * 2. dma_need_sync is true, and it fell back to allocating one frame.
41          * In either case, try to continue allocating frames one by one, until
42          * the first error, which will mean there are no more valid descriptors.
43          */
44         for (; batch < rq->mpwqe.pages_per_wqe; batch++) {
45                 xsk_buffs[batch] = xsk_buff_alloc(rq->xsk_pool);
46                 if (unlikely(!xsk_buffs[batch]))
47                         goto err_reuse_batch;
48         }
49
50         pi = mlx5e_icosq_get_next_pi(icosq, rq->mpwqe.umr_wqebbs);
51         umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
52         memcpy(umr_wqe, &rq->mpwqe.umr_wqe, sizeof(struct mlx5e_umr_wqe));
53
54         if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
55                 for (i = 0; i < batch; i++) {
56                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
57                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
58
59                         umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
60                                 .ptag = cpu_to_be64(addr | MLX5_EN_WR),
61                         };
62                         mxbuf->rq = rq;
63                 }
64         } else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
65                 for (i = 0; i < batch; i++) {
66                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
67                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
68
69                         umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
70                                 .key = rq->mkey_be,
71                                 .va = cpu_to_be64(addr),
72                         };
73                         mxbuf->rq = rq;
74                 }
75         } else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
76                 u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
77
78                 for (i = 0; i < batch; i++) {
79                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
80                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
81
82                         umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
83                                 .key = rq->mkey_be,
84                                 .va = cpu_to_be64(addr),
85                         };
86                         umr_wqe->inline_ksms[(i << 2) + 1] = (struct mlx5_ksm) {
87                                 .key = rq->mkey_be,
88                                 .va = cpu_to_be64(addr + mapping_size),
89                         };
90                         umr_wqe->inline_ksms[(i << 2) + 2] = (struct mlx5_ksm) {
91                                 .key = rq->mkey_be,
92                                 .va = cpu_to_be64(addr + mapping_size * 2),
93                         };
94                         umr_wqe->inline_ksms[(i << 2) + 3] = (struct mlx5_ksm) {
95                                 .key = rq->mkey_be,
96                                 .va = cpu_to_be64(rq->wqe_overflow.addr),
97                         };
98                         mxbuf->rq = rq;
99                 }
100         } else {
101                 __be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
102                                               rq->xsk_pool->chunk_size);
103                 __be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
104
105                 for (i = 0; i < batch; i++) {
106                         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(xsk_buffs[i]);
107                         dma_addr_t addr = xsk_buff_xdp_get_frame_dma(xsk_buffs[i]);
108
109                         umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
110                                 .key = rq->mkey_be,
111                                 .va = cpu_to_be64(addr),
112                                 .bcount = frame_size,
113                         };
114                         umr_wqe->inline_klms[(i << 1) + 1] = (struct mlx5_klm) {
115                                 .key = rq->mkey_be,
116                                 .va = cpu_to_be64(rq->wqe_overflow.addr),
117                                 .bcount = pad_size,
118                         };
119                         mxbuf->rq = rq;
120                 }
121         }
122
123         bitmap_zero(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe);
124         wi->consumed_strides = 0;
125
126         umr_wqe->ctrl.opmod_idx_opcode =
127                 cpu_to_be32((icosq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR);
128
129         /* Optimized for speed: keep in sync with mlx5e_mpwrq_umr_entry_size. */
130         offset = ix * rq->mpwqe.mtts_per_wqe;
131         if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED))
132                 offset = offset * sizeof(struct mlx5_mtt) / MLX5_OCTWORD;
133         else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_OVERSIZED))
134                 offset = offset * sizeof(struct mlx5_klm) * 2 / MLX5_OCTWORD;
135         else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE))
136                 offset = offset * sizeof(struct mlx5_ksm) * 4 / MLX5_OCTWORD;
137         umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
138
139         icosq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
140                 .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX,
141                 .num_wqebbs = rq->mpwqe.umr_wqebbs,
142                 .umr.rq = rq,
143         };
144
145         icosq->pc += rq->mpwqe.umr_wqebbs;
146
147         icosq->doorbell_cseg = &umr_wqe->ctrl;
148
149         return 0;
150
151 err_reuse_batch:
152         while (--batch >= 0)
153                 xsk_buff_free(xsk_buffs[batch]);
154
155 err:
156         rq->stats->buff_alloc_err++;
157         return -ENOMEM;
158 }
159
160 int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
161 {
162         struct mlx5_wq_cyc *wq = &rq->wqe.wq;
163         struct xdp_buff **buffs;
164         u32 contig, alloc;
165         int i;
166
167         /* Each rq->wqe.frags->xskp is 1:1 mapped to an element inside the
168          * rq->wqe.alloc_units->xsk_buffs array allocated here.
169          */
170         buffs = rq->wqe.alloc_units->xsk_buffs;
171         contig = mlx5_wq_cyc_get_size(wq) - ix;
172         if (wqe_bulk <= contig) {
173                 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, wqe_bulk);
174         } else {
175                 alloc = xsk_buff_alloc_batch(rq->xsk_pool, buffs + ix, contig);
176                 if (likely(alloc == contig))
177                         alloc += xsk_buff_alloc_batch(rq->xsk_pool, buffs, wqe_bulk - contig);
178         }
179
180         for (i = 0; i < alloc; i++) {
181                 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
182                 struct mlx5e_wqe_frag_info *frag;
183                 struct mlx5e_rx_wqe_cyc *wqe;
184                 dma_addr_t addr;
185
186                 wqe = mlx5_wq_cyc_get_wqe(wq, j);
187                 /* Assumes log_num_frags == 0. */
188                 frag = &rq->wqe.frags[j];
189
190                 addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
191                 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
192                 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
193         }
194
195         return alloc;
196 }
197
198 int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
199 {
200         struct mlx5_wq_cyc *wq = &rq->wqe.wq;
201         int i;
202
203         for (i = 0; i < wqe_bulk; i++) {
204                 int j = mlx5_wq_cyc_ctr2ix(wq, ix + i);
205                 struct mlx5e_wqe_frag_info *frag;
206                 struct mlx5e_rx_wqe_cyc *wqe;
207                 dma_addr_t addr;
208
209                 wqe = mlx5_wq_cyc_get_wqe(wq, j);
210                 /* Assumes log_num_frags == 0. */
211                 frag = &rq->wqe.frags[j];
212
213                 *frag->xskp = xsk_buff_alloc(rq->xsk_pool);
214                 if (unlikely(!*frag->xskp))
215                         return i;
216
217                 addr = xsk_buff_xdp_get_frame_dma(*frag->xskp);
218                 wqe->data[0].addr = cpu_to_be64(addr + rq->buff.headroom);
219                 frag->flags &= ~BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
220         }
221
222         return wqe_bulk;
223 }
224
225 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_buff *xdp)
226 {
227         u32 totallen = xdp->data_end - xdp->data_meta;
228         u32 metalen = xdp->data - xdp->data_meta;
229         struct sk_buff *skb;
230
231         skb = napi_alloc_skb(rq->cq.napi, totallen);
232         if (unlikely(!skb)) {
233                 rq->stats->buff_alloc_err++;
234                 return NULL;
235         }
236
237         skb_put_data(skb, xdp->data_meta, totallen);
238
239         if (metalen) {
240                 skb_metadata_set(skb, metalen);
241                 __skb_pull(skb, metalen);
242         }
243
244         return skb;
245 }
246
247 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
248                                                     struct mlx5e_mpw_info *wi,
249                                                     struct mlx5_cqe64 *cqe,
250                                                     u16 cqe_bcnt,
251                                                     u32 head_offset,
252                                                     u32 page_idx)
253 {
254         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units.xsk_buffs[page_idx]);
255         struct bpf_prog *prog;
256
257         /* Check packet size. Note LRO doesn't use linear SKB */
258         if (unlikely(cqe_bcnt > rq->hw_mtu)) {
259                 rq->stats->oversize_pkts_sw_drop++;
260                 return NULL;
261         }
262
263         /* head_offset is not used in this function, because xdp->data and the
264          * DMA address point directly to the necessary place. Furthermore, in
265          * the current implementation, UMR pages are mapped to XSK frames, so
266          * head_offset should always be 0.
267          */
268         WARN_ON_ONCE(head_offset);
269
270         /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
271         mxbuf->cqe = cqe;
272         xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
273         xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
274         net_prefetch(mxbuf->xdp.data);
275
276         /* Possible flows:
277          * - XDP_REDIRECT to XSKMAP:
278          *   The page is owned by the userspace from now.
279          * - XDP_TX and other XDP_REDIRECTs:
280          *   The page was returned by ZCA and recycled.
281          * - XDP_DROP:
282          *   Recycle the page.
283          * - XDP_PASS:
284          *   Allocate an SKB, copy the data and recycle the page.
285          *
286          * Pages to be recycled go to the Reuse Ring on MPWQE deallocation. Its
287          * size is the same as the Driver RX Ring's size, and pages for WQEs are
288          * allocated first from the Reuse Ring, so it has enough space.
289          */
290
291         prog = rcu_dereference(rq->xdp_prog);
292         if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
293                 if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
294                         __set_bit(page_idx, wi->skip_release_bitmap); /* non-atomic */
295                 return NULL; /* page/packet was consumed by XDP */
296         }
297
298         /* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
299          * frame. On SKB allocation failure, NULL is returned.
300          */
301         return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
302 }
303
304 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
305                                               struct mlx5e_wqe_frag_info *wi,
306                                               struct mlx5_cqe64 *cqe,
307                                               u32 cqe_bcnt)
308 {
309         struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(*wi->xskp);
310         struct bpf_prog *prog;
311
312         /* wi->offset is not used in this function, because xdp->data and the
313          * DMA address point directly to the necessary place. Furthermore, the
314          * XSK allocator allocates frames per packet, instead of pages, so
315          * wi->offset should always be 0.
316          */
317         WARN_ON_ONCE(wi->offset);
318
319         /* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
320         mxbuf->cqe = cqe;
321         xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
322         xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
323         net_prefetch(mxbuf->xdp.data);
324
325         prog = rcu_dereference(rq->xdp_prog);
326         if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf)))
327                 return NULL; /* page/packet was consumed by XDP */
328
329         /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
330          * will be handled by mlx5e_free_rx_wqe.
331          * On SKB allocation failure, NULL is returned.
332          */
333         return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
334 }