Commit | Line | Data |
---|---|---|
62d03330 JK |
1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
2 | /* Copyright (C) 2015-2019 Netronome Systems, Inc. */ | |
3 | ||
4 | #include <linux/bpf_trace.h> | |
5 | #include <linux/netdevice.h> | |
67d2656b | 6 | #include <linux/bitfield.h> |
57f273ad | 7 | #include <net/xfrm.h> |
62d03330 JK |
8 | |
9 | #include "../nfp_app.h" | |
10 | #include "../nfp_net.h" | |
11 | #include "../nfp_net_dp.h" | |
12 | #include "../nfp_net_xsk.h" | |
13 | #include "../crypto/crypto.h" | |
14 | #include "../crypto/fw.h" | |
15 | #include "nfd3.h" | |
16 | ||
17 | /* Transmit processing | |
18 | * | |
19 | * One queue controller peripheral queue is used for transmit. The | |
20 | * driver en-queues packets for transmit by advancing the write | |
21 | * pointer. The device indicates that packets have transmitted by | |
22 | * advancing the read pointer. The driver maintains a local copy of | |
23 | * the read and write pointer in @struct nfp_net_tx_ring. The driver | |
24 | * keeps @wr_p in sync with the queue controller write pointer and can | |
25 | * determine how many packets have been transmitted by comparing its | |
26 | * copy of the read pointer @rd_p with the read pointer maintained by | |
27 | * the queue controller peripheral. | |
28 | */ | |
29 | ||
30 | /* Wrappers for deciding when to stop and restart TX queues */ | |
31 | static int nfp_nfd3_tx_ring_should_wake(struct nfp_net_tx_ring *tx_ring) | |
32 | { | |
33 | return !nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS * 4); | |
34 | } | |
35 | ||
36 | static int nfp_nfd3_tx_ring_should_stop(struct nfp_net_tx_ring *tx_ring) | |
37 | { | |
38 | return nfp_net_tx_full(tx_ring, MAX_SKB_FRAGS + 1); | |
39 | } | |
40 | ||
41 | /** | |
42 | * nfp_nfd3_tx_ring_stop() - stop tx ring | |
43 | * @nd_q: netdev queue | |
44 | * @tx_ring: driver tx queue structure | |
45 | * | |
46 | * Safely stop TX ring. Remember that while we are running .start_xmit() | |
47 | * someone else may be cleaning the TX ring completions so we need to be | |
48 | * extra careful here. | |
49 | */ | |
50 | static void | |
51 | nfp_nfd3_tx_ring_stop(struct netdev_queue *nd_q, | |
52 | struct nfp_net_tx_ring *tx_ring) | |
53 | { | |
54 | netif_tx_stop_queue(nd_q); | |
55 | ||
56 | /* We can race with the TX completion out of NAPI so recheck */ | |
57 | smp_mb(); | |
58 | if (unlikely(nfp_nfd3_tx_ring_should_wake(tx_ring))) | |
59 | netif_tx_start_queue(nd_q); | |
60 | } | |
61 | ||
62 | /** | |
63 | * nfp_nfd3_tx_tso() - Set up Tx descriptor for LSO | |
64 | * @r_vec: per-ring structure | |
65 | * @txbuf: Pointer to driver soft TX descriptor | |
66 | * @txd: Pointer to HW TX descriptor | |
67 | * @skb: Pointer to SKB | |
68 | * @md_bytes: Prepend length | |
69 | * | |
70 | * Set up Tx descriptor for LSO, do nothing for non-LSO skbs. | |
71 | * Return error on packet header greater than maximum supported LSO header size. | |
72 | */ | |
73 | static void | |
74 | nfp_nfd3_tx_tso(struct nfp_net_r_vector *r_vec, struct nfp_nfd3_tx_buf *txbuf, | |
75 | struct nfp_nfd3_tx_desc *txd, struct sk_buff *skb, u32 md_bytes) | |
76 | { | |
77 | u32 l3_offset, l4_offset, hdrlen; | |
78 | u16 mss; | |
79 | ||
80 | if (!skb_is_gso(skb)) | |
81 | return; | |
82 | ||
83 | if (!skb->encapsulation) { | |
84 | l3_offset = skb_network_offset(skb); | |
85 | l4_offset = skb_transport_offset(skb); | |
504148fe | 86 | hdrlen = skb_tcp_all_headers(skb); |
62d03330 JK |
87 | } else { |
88 | l3_offset = skb_inner_network_offset(skb); | |
89 | l4_offset = skb_inner_transport_offset(skb); | |
504148fe | 90 | hdrlen = skb_inner_tcp_all_headers(skb); |
62d03330 JK |
91 | } |
92 | ||
93 | txbuf->pkt_cnt = skb_shinfo(skb)->gso_segs; | |
94 | txbuf->real_len += hdrlen * (txbuf->pkt_cnt - 1); | |
95 | ||
96 | mss = skb_shinfo(skb)->gso_size & NFD3_DESC_TX_MSS_MASK; | |
97 | txd->l3_offset = l3_offset - md_bytes; | |
98 | txd->l4_offset = l4_offset - md_bytes; | |
99 | txd->lso_hdrlen = hdrlen - md_bytes; | |
100 | txd->mss = cpu_to_le16(mss); | |
101 | txd->flags |= NFD3_DESC_TX_LSO; | |
102 | ||
103 | u64_stats_update_begin(&r_vec->tx_sync); | |
104 | r_vec->tx_lso++; | |
105 | u64_stats_update_end(&r_vec->tx_sync); | |
106 | } | |
107 | ||
108 | /** | |
109 | * nfp_nfd3_tx_csum() - Set TX CSUM offload flags in TX descriptor | |
110 | * @dp: NFP Net data path struct | |
111 | * @r_vec: per-ring structure | |
112 | * @txbuf: Pointer to driver soft TX descriptor | |
113 | * @txd: Pointer to TX descriptor | |
114 | * @skb: Pointer to SKB | |
115 | * | |
116 | * This function sets the TX checksum flags in the TX descriptor based | |
117 | * on the configuration and the protocol of the packet to be transmitted. | |
118 | */ | |
119 | static void | |
120 | nfp_nfd3_tx_csum(struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, | |
121 | struct nfp_nfd3_tx_buf *txbuf, struct nfp_nfd3_tx_desc *txd, | |
122 | struct sk_buff *skb) | |
123 | { | |
124 | struct ipv6hdr *ipv6h; | |
125 | struct iphdr *iph; | |
126 | u8 l4_hdr; | |
127 | ||
128 | if (!(dp->ctrl & NFP_NET_CFG_CTRL_TXCSUM)) | |
129 | return; | |
130 | ||
131 | if (skb->ip_summed != CHECKSUM_PARTIAL) | |
132 | return; | |
133 | ||
134 | txd->flags |= NFD3_DESC_TX_CSUM; | |
135 | if (skb->encapsulation) | |
136 | txd->flags |= NFD3_DESC_TX_ENCAP; | |
137 | ||
138 | iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb); | |
139 | ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb) : ipv6_hdr(skb); | |
140 | ||
141 | if (iph->version == 4) { | |
142 | txd->flags |= NFD3_DESC_TX_IP4_CSUM; | |
143 | l4_hdr = iph->protocol; | |
144 | } else if (ipv6h->version == 6) { | |
145 | l4_hdr = ipv6h->nexthdr; | |
146 | } else { | |
147 | nn_dp_warn(dp, "partial checksum but ipv=%x!\n", iph->version); | |
148 | return; | |
149 | } | |
150 | ||
151 | switch (l4_hdr) { | |
152 | case IPPROTO_TCP: | |
153 | txd->flags |= NFD3_DESC_TX_TCP_CSUM; | |
154 | break; | |
155 | case IPPROTO_UDP: | |
156 | txd->flags |= NFD3_DESC_TX_UDP_CSUM; | |
157 | break; | |
158 | default: | |
159 | nn_dp_warn(dp, "partial checksum but l4 proto=%x!\n", l4_hdr); | |
160 | return; | |
161 | } | |
162 | ||
163 | u64_stats_update_begin(&r_vec->tx_sync); | |
164 | if (skb->encapsulation) | |
165 | r_vec->hw_csum_tx_inner += txbuf->pkt_cnt; | |
166 | else | |
167 | r_vec->hw_csum_tx += txbuf->pkt_cnt; | |
168 | u64_stats_update_end(&r_vec->tx_sync); | |
169 | } | |
170 | ||
57f273ad HW |
171 | static int nfp_nfd3_prep_tx_meta(struct nfp_net_dp *dp, struct sk_buff *skb, |
172 | u64 tls_handle, bool *ipsec) | |
62d03330 JK |
173 | { |
174 | struct metadata_dst *md_dst = skb_metadata_dst(skb); | |
57f273ad | 175 | struct nfp_ipsec_offload offload_info; |
62d03330 | 176 | unsigned char *data; |
d80702ff | 177 | bool vlan_insert; |
62d03330 JK |
178 | u32 meta_id = 0; |
179 | int md_bytes; | |
180 | ||
57f273ad HW |
181 | #ifdef CONFIG_NFP_NET_IPSEC |
182 | if (xfrm_offload(skb)) | |
183 | *ipsec = nfp_net_ipsec_tx_prep(dp, skb, &offload_info); | |
184 | #endif | |
185 | ||
186 | if (unlikely(md_dst && md_dst->type != METADATA_HW_PORT_MUX)) | |
187 | md_dst = NULL; | |
62d03330 | 188 | |
d80702ff DW |
189 | vlan_insert = skb_vlan_tag_present(skb) && (dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN_V2); |
190 | ||
57f273ad | 191 | if (!(md_dst || tls_handle || vlan_insert || *ipsec)) |
d80702ff DW |
192 | return 0; |
193 | ||
194 | md_bytes = sizeof(meta_id) + | |
436396f2 HW |
195 | (!!md_dst ? NFP_NET_META_PORTID_SIZE : 0) + |
196 | (!!tls_handle ? NFP_NET_META_CONN_HANDLE_SIZE : 0) + | |
197 | (vlan_insert ? NFP_NET_META_VLAN_SIZE : 0) + | |
198 | (*ipsec ? NFP_NET_META_IPSEC_FIELD_SIZE : 0); | |
62d03330 JK |
199 | |
200 | if (unlikely(skb_cow_head(skb, md_bytes))) | |
201 | return -ENOMEM; | |
202 | ||
62d03330 JK |
203 | data = skb_push(skb, md_bytes) + md_bytes; |
204 | if (md_dst) { | |
d80702ff | 205 | data -= NFP_NET_META_PORTID_SIZE; |
62d03330 JK |
206 | put_unaligned_be32(md_dst->u.port_info.port_id, data); |
207 | meta_id = NFP_NET_META_PORTID; | |
208 | } | |
209 | if (tls_handle) { | |
210 | /* conn handle is opaque, we just use u64 to be able to quickly | |
211 | * compare it to zero | |
212 | */ | |
d80702ff | 213 | data -= NFP_NET_META_CONN_HANDLE_SIZE; |
62d03330 JK |
214 | memcpy(data, &tls_handle, sizeof(tls_handle)); |
215 | meta_id <<= NFP_NET_META_FIELD_SIZE; | |
216 | meta_id |= NFP_NET_META_CONN_HANDLE; | |
217 | } | |
d80702ff DW |
218 | if (vlan_insert) { |
219 | data -= NFP_NET_META_VLAN_SIZE; | |
220 | /* data type of skb->vlan_proto is __be16 | |
221 | * so it fills metadata without calling put_unaligned_be16 | |
222 | */ | |
223 | memcpy(data, &skb->vlan_proto, sizeof(skb->vlan_proto)); | |
224 | put_unaligned_be16(skb_vlan_tag_get(skb), data + sizeof(skb->vlan_proto)); | |
225 | meta_id <<= NFP_NET_META_FIELD_SIZE; | |
226 | meta_id |= NFP_NET_META_VLAN; | |
227 | } | |
57f273ad | 228 | if (*ipsec) { |
57f273ad HW |
229 | data -= NFP_NET_META_IPSEC_SIZE; |
230 | put_unaligned_be32(offload_info.seq_hi, data); | |
231 | data -= NFP_NET_META_IPSEC_SIZE; | |
232 | put_unaligned_be32(offload_info.seq_low, data); | |
233 | data -= NFP_NET_META_IPSEC_SIZE; | |
234 | put_unaligned_be32(offload_info.handle - 1, data); | |
235 | meta_id <<= NFP_NET_META_IPSEC_FIELD_SIZE; | |
236 | meta_id |= NFP_NET_META_IPSEC << 8 | NFP_NET_META_IPSEC << 4 | NFP_NET_META_IPSEC; | |
237 | } | |
62d03330 | 238 | |
d80702ff | 239 | data -= sizeof(meta_id); |
62d03330 JK |
240 | put_unaligned_be32(meta_id, data); |
241 | ||
242 | return md_bytes; | |
243 | } | |
244 | ||
245 | /** | |
246 | * nfp_nfd3_tx() - Main transmit entry point | |
247 | * @skb: SKB to transmit | |
248 | * @netdev: netdev structure | |
249 | * | |
250 | * Return: NETDEV_TX_OK on success. | |
251 | */ | |
252 | netdev_tx_t nfp_nfd3_tx(struct sk_buff *skb, struct net_device *netdev) | |
253 | { | |
254 | struct nfp_net *nn = netdev_priv(netdev); | |
255 | int f, nr_frags, wr_idx, md_bytes; | |
256 | struct nfp_net_tx_ring *tx_ring; | |
257 | struct nfp_net_r_vector *r_vec; | |
258 | struct nfp_nfd3_tx_buf *txbuf; | |
259 | struct nfp_nfd3_tx_desc *txd; | |
260 | struct netdev_queue *nd_q; | |
261 | const skb_frag_t *frag; | |
262 | struct nfp_net_dp *dp; | |
263 | dma_addr_t dma_addr; | |
264 | unsigned int fsize; | |
265 | u64 tls_handle = 0; | |
57f273ad | 266 | bool ipsec = false; |
62d03330 JK |
267 | u16 qidx; |
268 | ||
269 | dp = &nn->dp; | |
270 | qidx = skb_get_queue_mapping(skb); | |
271 | tx_ring = &dp->tx_rings[qidx]; | |
272 | r_vec = tx_ring->r_vec; | |
273 | ||
274 | nr_frags = skb_shinfo(skb)->nr_frags; | |
275 | ||
276 | if (unlikely(nfp_net_tx_full(tx_ring, nr_frags + 1))) { | |
277 | nn_dp_warn(dp, "TX ring %d busy. wrp=%u rdp=%u\n", | |
278 | qidx, tx_ring->wr_p, tx_ring->rd_p); | |
279 | nd_q = netdev_get_tx_queue(dp->netdev, qidx); | |
280 | netif_tx_stop_queue(nd_q); | |
281 | nfp_net_tx_xmit_more_flush(tx_ring); | |
282 | u64_stats_update_begin(&r_vec->tx_sync); | |
283 | r_vec->tx_busy++; | |
284 | u64_stats_update_end(&r_vec->tx_sync); | |
285 | return NETDEV_TX_BUSY; | |
286 | } | |
287 | ||
288 | skb = nfp_net_tls_tx(dp, r_vec, skb, &tls_handle, &nr_frags); | |
289 | if (unlikely(!skb)) { | |
290 | nfp_net_tx_xmit_more_flush(tx_ring); | |
291 | return NETDEV_TX_OK; | |
292 | } | |
293 | ||
57f273ad | 294 | md_bytes = nfp_nfd3_prep_tx_meta(dp, skb, tls_handle, &ipsec); |
62d03330 JK |
295 | if (unlikely(md_bytes < 0)) |
296 | goto err_flush; | |
297 | ||
298 | /* Start with the head skbuf */ | |
299 | dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), | |
300 | DMA_TO_DEVICE); | |
301 | if (dma_mapping_error(dp->dev, dma_addr)) | |
302 | goto err_dma_err; | |
303 | ||
304 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); | |
305 | ||
306 | /* Stash the soft descriptor of the head then initialize it */ | |
307 | txbuf = &tx_ring->txbufs[wr_idx]; | |
308 | txbuf->skb = skb; | |
309 | txbuf->dma_addr = dma_addr; | |
310 | txbuf->fidx = -1; | |
311 | txbuf->pkt_cnt = 1; | |
312 | txbuf->real_len = skb->len; | |
313 | ||
314 | /* Build TX descriptor */ | |
315 | txd = &tx_ring->txds[wr_idx]; | |
316 | txd->offset_eop = (nr_frags ? 0 : NFD3_DESC_TX_EOP) | md_bytes; | |
317 | txd->dma_len = cpu_to_le16(skb_headlen(skb)); | |
5f30671d | 318 | nfp_desc_set_dma_addr_40b(txd, dma_addr); |
62d03330 JK |
319 | txd->data_len = cpu_to_le16(skb->len); |
320 | ||
321 | txd->flags = 0; | |
322 | txd->mss = 0; | |
323 | txd->lso_hdrlen = 0; | |
324 | ||
325 | /* Do not reorder - tso may adjust pkt cnt, vlan may override fields */ | |
326 | nfp_nfd3_tx_tso(r_vec, txbuf, txd, skb, md_bytes); | |
3e04419c HW |
327 | if (ipsec) |
328 | nfp_nfd3_ipsec_tx(txd, skb); | |
329 | else | |
330 | nfp_nfd3_tx_csum(dp, r_vec, txbuf, txd, skb); | |
62d03330 JK |
331 | if (skb_vlan_tag_present(skb) && dp->ctrl & NFP_NET_CFG_CTRL_TXVLAN) { |
332 | txd->flags |= NFD3_DESC_TX_VLAN; | |
333 | txd->vlan = cpu_to_le16(skb_vlan_tag_get(skb)); | |
334 | } | |
335 | ||
336 | /* Gather DMA */ | |
337 | if (nr_frags > 0) { | |
338 | __le64 second_half; | |
339 | ||
340 | /* all descs must match except for in addr, length and eop */ | |
341 | second_half = txd->vals8[1]; | |
342 | ||
343 | for (f = 0; f < nr_frags; f++) { | |
344 | frag = &skb_shinfo(skb)->frags[f]; | |
345 | fsize = skb_frag_size(frag); | |
346 | ||
347 | dma_addr = skb_frag_dma_map(dp->dev, frag, 0, | |
348 | fsize, DMA_TO_DEVICE); | |
349 | if (dma_mapping_error(dp->dev, dma_addr)) | |
350 | goto err_unmap; | |
351 | ||
352 | wr_idx = D_IDX(tx_ring, wr_idx + 1); | |
353 | tx_ring->txbufs[wr_idx].skb = skb; | |
354 | tx_ring->txbufs[wr_idx].dma_addr = dma_addr; | |
355 | tx_ring->txbufs[wr_idx].fidx = f; | |
356 | ||
357 | txd = &tx_ring->txds[wr_idx]; | |
358 | txd->dma_len = cpu_to_le16(fsize); | |
5f30671d | 359 | nfp_desc_set_dma_addr_40b(txd, dma_addr); |
62d03330 JK |
360 | txd->offset_eop = md_bytes | |
361 | ((f == nr_frags - 1) ? NFD3_DESC_TX_EOP : 0); | |
362 | txd->vals8[1] = second_half; | |
363 | } | |
364 | ||
365 | u64_stats_update_begin(&r_vec->tx_sync); | |
366 | r_vec->tx_gather++; | |
367 | u64_stats_update_end(&r_vec->tx_sync); | |
368 | } | |
369 | ||
370 | skb_tx_timestamp(skb); | |
371 | ||
372 | nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); | |
373 | ||
374 | tx_ring->wr_p += nr_frags + 1; | |
375 | if (nfp_nfd3_tx_ring_should_stop(tx_ring)) | |
376 | nfp_nfd3_tx_ring_stop(nd_q, tx_ring); | |
377 | ||
378 | tx_ring->wr_ptr_add += nr_frags + 1; | |
379 | if (__netdev_tx_sent_queue(nd_q, txbuf->real_len, netdev_xmit_more())) | |
380 | nfp_net_tx_xmit_more_flush(tx_ring); | |
381 | ||
382 | return NETDEV_TX_OK; | |
383 | ||
384 | err_unmap: | |
385 | while (--f >= 0) { | |
386 | frag = &skb_shinfo(skb)->frags[f]; | |
387 | dma_unmap_page(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, | |
388 | skb_frag_size(frag), DMA_TO_DEVICE); | |
389 | tx_ring->txbufs[wr_idx].skb = NULL; | |
390 | tx_ring->txbufs[wr_idx].dma_addr = 0; | |
391 | tx_ring->txbufs[wr_idx].fidx = -2; | |
392 | wr_idx = wr_idx - 1; | |
393 | if (wr_idx < 0) | |
394 | wr_idx += tx_ring->cnt; | |
395 | } | |
396 | dma_unmap_single(dp->dev, tx_ring->txbufs[wr_idx].dma_addr, | |
397 | skb_headlen(skb), DMA_TO_DEVICE); | |
398 | tx_ring->txbufs[wr_idx].skb = NULL; | |
399 | tx_ring->txbufs[wr_idx].dma_addr = 0; | |
400 | tx_ring->txbufs[wr_idx].fidx = -2; | |
401 | err_dma_err: | |
402 | nn_dp_warn(dp, "Failed to map DMA TX buffer\n"); | |
403 | err_flush: | |
404 | nfp_net_tx_xmit_more_flush(tx_ring); | |
405 | u64_stats_update_begin(&r_vec->tx_sync); | |
406 | r_vec->tx_errors++; | |
407 | u64_stats_update_end(&r_vec->tx_sync); | |
408 | nfp_net_tls_tx_undo(skb, tls_handle); | |
409 | dev_kfree_skb_any(skb); | |
410 | return NETDEV_TX_OK; | |
411 | } | |
412 | ||
413 | /** | |
414 | * nfp_nfd3_tx_complete() - Handled completed TX packets | |
415 | * @tx_ring: TX ring structure | |
416 | * @budget: NAPI budget (only used as bool to determine if in NAPI context) | |
417 | */ | |
418 | void nfp_nfd3_tx_complete(struct nfp_net_tx_ring *tx_ring, int budget) | |
419 | { | |
420 | struct nfp_net_r_vector *r_vec = tx_ring->r_vec; | |
421 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; | |
422 | u32 done_pkts = 0, done_bytes = 0; | |
423 | struct netdev_queue *nd_q; | |
424 | u32 qcp_rd_p; | |
425 | int todo; | |
426 | ||
427 | if (tx_ring->wr_p == tx_ring->rd_p) | |
428 | return; | |
429 | ||
430 | /* Work out how many descriptors have been transmitted */ | |
0dcf7f50 | 431 | qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); |
62d03330 JK |
432 | |
433 | if (qcp_rd_p == tx_ring->qcp_rd_p) | |
434 | return; | |
435 | ||
436 | todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); | |
437 | ||
438 | while (todo--) { | |
439 | const skb_frag_t *frag; | |
440 | struct nfp_nfd3_tx_buf *tx_buf; | |
441 | struct sk_buff *skb; | |
442 | int fidx, nr_frags; | |
443 | int idx; | |
444 | ||
445 | idx = D_IDX(tx_ring, tx_ring->rd_p++); | |
446 | tx_buf = &tx_ring->txbufs[idx]; | |
447 | ||
448 | skb = tx_buf->skb; | |
449 | if (!skb) | |
450 | continue; | |
451 | ||
452 | nr_frags = skb_shinfo(skb)->nr_frags; | |
453 | fidx = tx_buf->fidx; | |
454 | ||
455 | if (fidx == -1) { | |
456 | /* unmap head */ | |
457 | dma_unmap_single(dp->dev, tx_buf->dma_addr, | |
458 | skb_headlen(skb), DMA_TO_DEVICE); | |
459 | ||
460 | done_pkts += tx_buf->pkt_cnt; | |
461 | done_bytes += tx_buf->real_len; | |
462 | } else { | |
463 | /* unmap fragment */ | |
464 | frag = &skb_shinfo(skb)->frags[fidx]; | |
465 | dma_unmap_page(dp->dev, tx_buf->dma_addr, | |
466 | skb_frag_size(frag), DMA_TO_DEVICE); | |
467 | } | |
468 | ||
469 | /* check for last gather fragment */ | |
470 | if (fidx == nr_frags - 1) | |
471 | napi_consume_skb(skb, budget); | |
472 | ||
473 | tx_buf->dma_addr = 0; | |
474 | tx_buf->skb = NULL; | |
475 | tx_buf->fidx = -2; | |
476 | } | |
477 | ||
478 | tx_ring->qcp_rd_p = qcp_rd_p; | |
479 | ||
480 | u64_stats_update_begin(&r_vec->tx_sync); | |
481 | r_vec->tx_bytes += done_bytes; | |
482 | r_vec->tx_pkts += done_pkts; | |
483 | u64_stats_update_end(&r_vec->tx_sync); | |
484 | ||
485 | if (!dp->netdev) | |
486 | return; | |
487 | ||
488 | nd_q = netdev_get_tx_queue(dp->netdev, tx_ring->idx); | |
489 | netdev_tx_completed_queue(nd_q, done_pkts, done_bytes); | |
490 | if (nfp_nfd3_tx_ring_should_wake(tx_ring)) { | |
491 | /* Make sure TX thread will see updated tx_ring->rd_p */ | |
492 | smp_mb(); | |
493 | ||
494 | if (unlikely(netif_tx_queue_stopped(nd_q))) | |
495 | netif_tx_wake_queue(nd_q); | |
496 | } | |
497 | ||
498 | WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, | |
499 | "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", | |
500 | tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); | |
501 | } | |
502 | ||
503 | static bool nfp_nfd3_xdp_complete(struct nfp_net_tx_ring *tx_ring) | |
504 | { | |
505 | struct nfp_net_r_vector *r_vec = tx_ring->r_vec; | |
0dcf7f50 | 506 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; |
62d03330 JK |
507 | u32 done_pkts = 0, done_bytes = 0; |
508 | bool done_all; | |
509 | int idx, todo; | |
510 | u32 qcp_rd_p; | |
511 | ||
512 | /* Work out how many descriptors have been transmitted */ | |
0dcf7f50 | 513 | qcp_rd_p = nfp_net_read_tx_cmpl(tx_ring, dp); |
62d03330 JK |
514 | |
515 | if (qcp_rd_p == tx_ring->qcp_rd_p) | |
516 | return true; | |
517 | ||
518 | todo = D_IDX(tx_ring, qcp_rd_p - tx_ring->qcp_rd_p); | |
519 | ||
520 | done_all = todo <= NFP_NET_XDP_MAX_COMPLETE; | |
521 | todo = min(todo, NFP_NET_XDP_MAX_COMPLETE); | |
522 | ||
523 | tx_ring->qcp_rd_p = D_IDX(tx_ring, tx_ring->qcp_rd_p + todo); | |
524 | ||
525 | done_pkts = todo; | |
526 | while (todo--) { | |
527 | idx = D_IDX(tx_ring, tx_ring->rd_p); | |
528 | tx_ring->rd_p++; | |
529 | ||
530 | done_bytes += tx_ring->txbufs[idx].real_len; | |
531 | } | |
532 | ||
533 | u64_stats_update_begin(&r_vec->tx_sync); | |
534 | r_vec->tx_bytes += done_bytes; | |
535 | r_vec->tx_pkts += done_pkts; | |
536 | u64_stats_update_end(&r_vec->tx_sync); | |
537 | ||
538 | WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt, | |
539 | "XDP TX ring corruption rd_p=%u wr_p=%u cnt=%u\n", | |
540 | tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt); | |
541 | ||
542 | return done_all; | |
543 | } | |
544 | ||
545 | /* Receive processing | |
546 | */ | |
547 | ||
548 | static void * | |
549 | nfp_nfd3_napi_alloc_one(struct nfp_net_dp *dp, dma_addr_t *dma_addr) | |
550 | { | |
551 | void *frag; | |
552 | ||
553 | if (!dp->xdp_prog) { | |
554 | frag = napi_alloc_frag(dp->fl_bufsz); | |
555 | if (unlikely(!frag)) | |
556 | return NULL; | |
557 | } else { | |
558 | struct page *page; | |
559 | ||
560 | page = dev_alloc_page(); | |
561 | if (unlikely(!page)) | |
562 | return NULL; | |
563 | frag = page_address(page); | |
564 | } | |
565 | ||
566 | *dma_addr = nfp_net_dma_map_rx(dp, frag); | |
567 | if (dma_mapping_error(dp->dev, *dma_addr)) { | |
568 | nfp_net_free_frag(frag, dp->xdp_prog); | |
569 | nn_dp_warn(dp, "Failed to map DMA RX buffer\n"); | |
570 | return NULL; | |
571 | } | |
572 | ||
573 | return frag; | |
574 | } | |
575 | ||
576 | /** | |
577 | * nfp_nfd3_rx_give_one() - Put mapped skb on the software and hardware rings | |
578 | * @dp: NFP Net data path struct | |
579 | * @rx_ring: RX ring structure | |
580 | * @frag: page fragment buffer | |
581 | * @dma_addr: DMA address of skb mapping | |
582 | */ | |
583 | static void | |
584 | nfp_nfd3_rx_give_one(const struct nfp_net_dp *dp, | |
585 | struct nfp_net_rx_ring *rx_ring, | |
586 | void *frag, dma_addr_t dma_addr) | |
587 | { | |
588 | unsigned int wr_idx; | |
589 | ||
590 | wr_idx = D_IDX(rx_ring, rx_ring->wr_p); | |
591 | ||
592 | nfp_net_dma_sync_dev_rx(dp, dma_addr); | |
593 | ||
594 | /* Stash SKB and DMA address away */ | |
595 | rx_ring->rxbufs[wr_idx].frag = frag; | |
596 | rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; | |
597 | ||
598 | /* Fill freelist descriptor */ | |
599 | rx_ring->rxds[wr_idx].fld.reserved = 0; | |
600 | rx_ring->rxds[wr_idx].fld.meta_len_dd = 0; | |
5f30671d YZ |
601 | /* DMA address is expanded to 48-bit width in freelist for NFP3800, |
602 | * so the *_48b macro is used accordingly, it's also OK to fill | |
603 | * a 40-bit address since the top 8 bits are get set to 0. | |
604 | */ | |
605 | nfp_desc_set_dma_addr_48b(&rx_ring->rxds[wr_idx].fld, | |
606 | dma_addr + dp->rx_dma_off); | |
62d03330 JK |
607 | |
608 | rx_ring->wr_p++; | |
609 | if (!(rx_ring->wr_p % NFP_NET_FL_BATCH)) { | |
610 | /* Update write pointer of the freelist queue. Make | |
611 | * sure all writes are flushed before telling the hardware. | |
612 | */ | |
613 | wmb(); | |
614 | nfp_qcp_wr_ptr_add(rx_ring->qcp_fl, NFP_NET_FL_BATCH); | |
615 | } | |
616 | } | |
617 | ||
618 | /** | |
619 | * nfp_nfd3_rx_ring_fill_freelist() - Give buffers from the ring to FW | |
620 | * @dp: NFP Net data path struct | |
621 | * @rx_ring: RX ring to fill | |
622 | */ | |
623 | void nfp_nfd3_rx_ring_fill_freelist(struct nfp_net_dp *dp, | |
624 | struct nfp_net_rx_ring *rx_ring) | |
625 | { | |
626 | unsigned int i; | |
627 | ||
628 | if (nfp_net_has_xsk_pool_slow(dp, rx_ring->idx)) | |
629 | return nfp_net_xsk_rx_ring_fill_freelist(rx_ring); | |
630 | ||
631 | for (i = 0; i < rx_ring->cnt - 1; i++) | |
632 | nfp_nfd3_rx_give_one(dp, rx_ring, rx_ring->rxbufs[i].frag, | |
633 | rx_ring->rxbufs[i].dma_addr); | |
634 | } | |
635 | ||
636 | /** | |
637 | * nfp_nfd3_rx_csum_has_errors() - group check if rxd has any csum errors | |
638 | * @flags: RX descriptor flags field in CPU byte order | |
639 | */ | |
640 | static int nfp_nfd3_rx_csum_has_errors(u16 flags) | |
641 | { | |
642 | u16 csum_all_checked, csum_all_ok; | |
643 | ||
644 | csum_all_checked = flags & __PCIE_DESC_RX_CSUM_ALL; | |
645 | csum_all_ok = flags & __PCIE_DESC_RX_CSUM_ALL_OK; | |
646 | ||
647 | return csum_all_checked != (csum_all_ok << PCIE_DESC_RX_CSUM_OK_SHIFT); | |
648 | } | |
649 | ||
650 | /** | |
651 | * nfp_nfd3_rx_csum() - set SKB checksum field based on RX descriptor flags | |
652 | * @dp: NFP Net data path struct | |
653 | * @r_vec: per-ring structure | |
654 | * @rxd: Pointer to RX descriptor | |
655 | * @meta: Parsed metadata prepend | |
656 | * @skb: Pointer to SKB | |
657 | */ | |
658 | void | |
659 | nfp_nfd3_rx_csum(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, | |
660 | const struct nfp_net_rx_desc *rxd, | |
661 | const struct nfp_meta_parsed *meta, struct sk_buff *skb) | |
662 | { | |
663 | skb_checksum_none_assert(skb); | |
664 | ||
665 | if (!(dp->netdev->features & NETIF_F_RXCSUM)) | |
666 | return; | |
667 | ||
668 | if (meta->csum_type) { | |
669 | skb->ip_summed = meta->csum_type; | |
670 | skb->csum = meta->csum; | |
671 | u64_stats_update_begin(&r_vec->rx_sync); | |
672 | r_vec->hw_csum_rx_complete++; | |
673 | u64_stats_update_end(&r_vec->rx_sync); | |
674 | return; | |
675 | } | |
676 | ||
677 | if (nfp_nfd3_rx_csum_has_errors(le16_to_cpu(rxd->rxd.flags))) { | |
678 | u64_stats_update_begin(&r_vec->rx_sync); | |
679 | r_vec->hw_csum_rx_error++; | |
680 | u64_stats_update_end(&r_vec->rx_sync); | |
681 | return; | |
682 | } | |
683 | ||
684 | /* Assume that the firmware will never report inner CSUM_OK unless outer | |
685 | * L4 headers were successfully parsed. FW will always report zero UDP | |
686 | * checksum as CSUM_OK. | |
687 | */ | |
688 | if (rxd->rxd.flags & PCIE_DESC_RX_TCP_CSUM_OK || | |
689 | rxd->rxd.flags & PCIE_DESC_RX_UDP_CSUM_OK) { | |
690 | __skb_incr_checksum_unnecessary(skb); | |
691 | u64_stats_update_begin(&r_vec->rx_sync); | |
692 | r_vec->hw_csum_rx_ok++; | |
693 | u64_stats_update_end(&r_vec->rx_sync); | |
694 | } | |
695 | ||
696 | if (rxd->rxd.flags & PCIE_DESC_RX_I_TCP_CSUM_OK || | |
697 | rxd->rxd.flags & PCIE_DESC_RX_I_UDP_CSUM_OK) { | |
698 | __skb_incr_checksum_unnecessary(skb); | |
699 | u64_stats_update_begin(&r_vec->rx_sync); | |
700 | r_vec->hw_csum_rx_inner_ok++; | |
701 | u64_stats_update_end(&r_vec->rx_sync); | |
702 | } | |
703 | } | |
704 | ||
705 | static void | |
706 | nfp_nfd3_set_hash(struct net_device *netdev, struct nfp_meta_parsed *meta, | |
707 | unsigned int type, __be32 *hash) | |
708 | { | |
709 | if (!(netdev->features & NETIF_F_RXHASH)) | |
710 | return; | |
711 | ||
712 | switch (type) { | |
713 | case NFP_NET_RSS_IPV4: | |
714 | case NFP_NET_RSS_IPV6: | |
715 | case NFP_NET_RSS_IPV6_EX: | |
716 | meta->hash_type = PKT_HASH_TYPE_L3; | |
717 | break; | |
718 | default: | |
719 | meta->hash_type = PKT_HASH_TYPE_L4; | |
720 | break; | |
721 | } | |
722 | ||
723 | meta->hash = get_unaligned_be32(hash); | |
724 | } | |
725 | ||
726 | static void | |
727 | nfp_nfd3_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta, | |
728 | void *data, struct nfp_net_rx_desc *rxd) | |
729 | { | |
730 | struct nfp_net_rx_hash *rx_hash = data; | |
731 | ||
732 | if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) | |
733 | return; | |
734 | ||
735 | nfp_nfd3_set_hash(netdev, meta, get_unaligned_be32(&rx_hash->hash_type), | |
736 | &rx_hash->hash); | |
737 | } | |
738 | ||
739 | bool | |
740 | nfp_nfd3_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta, | |
741 | void *data, void *pkt, unsigned int pkt_len, int meta_len) | |
742 | { | |
67d2656b | 743 | u32 meta_info, vlan_info; |
62d03330 JK |
744 | |
745 | meta_info = get_unaligned_be32(data); | |
746 | data += 4; | |
747 | ||
748 | while (meta_info) { | |
749 | switch (meta_info & NFP_NET_META_FIELD_MASK) { | |
750 | case NFP_NET_META_HASH: | |
751 | meta_info >>= NFP_NET_META_FIELD_SIZE; | |
752 | nfp_nfd3_set_hash(netdev, meta, | |
753 | meta_info & NFP_NET_META_FIELD_MASK, | |
754 | (__be32 *)data); | |
755 | data += 4; | |
756 | break; | |
757 | case NFP_NET_META_MARK: | |
758 | meta->mark = get_unaligned_be32(data); | |
759 | data += 4; | |
760 | break; | |
67d2656b DW |
761 | case NFP_NET_META_VLAN: |
762 | vlan_info = get_unaligned_be32(data); | |
763 | if (FIELD_GET(NFP_NET_META_VLAN_STRIP, vlan_info)) { | |
764 | meta->vlan.stripped = true; | |
765 | meta->vlan.tpid = FIELD_GET(NFP_NET_META_VLAN_TPID_MASK, | |
766 | vlan_info); | |
767 | meta->vlan.tci = FIELD_GET(NFP_NET_META_VLAN_TCI_MASK, | |
768 | vlan_info); | |
769 | } | |
770 | data += 4; | |
771 | break; | |
62d03330 JK |
772 | case NFP_NET_META_PORTID: |
773 | meta->portid = get_unaligned_be32(data); | |
774 | data += 4; | |
775 | break; | |
776 | case NFP_NET_META_CSUM: | |
777 | meta->csum_type = CHECKSUM_COMPLETE; | |
778 | meta->csum = | |
779 | (__force __wsum)__get_unaligned_cpu32(data); | |
780 | data += 4; | |
781 | break; | |
782 | case NFP_NET_META_RESYNC_INFO: | |
783 | if (nfp_net_tls_rx_resync_req(netdev, data, pkt, | |
784 | pkt_len)) | |
785 | return false; | |
786 | data += sizeof(struct nfp_net_tls_resync_req); | |
787 | break; | |
57f273ad HW |
788 | #ifdef CONFIG_NFP_NET_IPSEC |
789 | case NFP_NET_META_IPSEC: | |
790 | /* Note: IPsec packet will have zero saidx, so need add 1 | |
791 | * to indicate packet is IPsec packet within driver. | |
792 | */ | |
793 | meta->ipsec_saidx = get_unaligned_be32(data) + 1; | |
794 | data += 4; | |
795 | break; | |
796 | #endif | |
62d03330 JK |
797 | default: |
798 | return true; | |
799 | } | |
800 | ||
801 | meta_info >>= NFP_NET_META_FIELD_SIZE; | |
802 | } | |
803 | ||
804 | return data != pkt; | |
805 | } | |
806 | ||
807 | static void | |
808 | nfp_nfd3_rx_drop(const struct nfp_net_dp *dp, struct nfp_net_r_vector *r_vec, | |
809 | struct nfp_net_rx_ring *rx_ring, struct nfp_net_rx_buf *rxbuf, | |
810 | struct sk_buff *skb) | |
811 | { | |
812 | u64_stats_update_begin(&r_vec->rx_sync); | |
813 | r_vec->rx_drops++; | |
814 | /* If we have both skb and rxbuf the replacement buffer allocation | |
815 | * must have failed, count this as an alloc failure. | |
816 | */ | |
817 | if (skb && rxbuf) | |
818 | r_vec->rx_replace_buf_alloc_fail++; | |
819 | u64_stats_update_end(&r_vec->rx_sync); | |
820 | ||
821 | /* skb is build based on the frag, free_skb() would free the frag | |
822 | * so to be able to reuse it we need an extra ref. | |
823 | */ | |
824 | if (skb && rxbuf && skb->head == rxbuf->frag) | |
825 | page_ref_inc(virt_to_head_page(rxbuf->frag)); | |
826 | if (rxbuf) | |
827 | nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, rxbuf->dma_addr); | |
828 | if (skb) | |
829 | dev_kfree_skb_any(skb); | |
830 | } | |
831 | ||
832 | static bool | |
833 | nfp_nfd3_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, | |
834 | struct nfp_net_tx_ring *tx_ring, | |
835 | struct nfp_net_rx_buf *rxbuf, unsigned int dma_off, | |
836 | unsigned int pkt_len, bool *completed) | |
837 | { | |
838 | unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA; | |
839 | struct nfp_nfd3_tx_buf *txbuf; | |
840 | struct nfp_nfd3_tx_desc *txd; | |
841 | int wr_idx; | |
842 | ||
843 | /* Reject if xdp_adjust_tail grow packet beyond DMA area */ | |
844 | if (pkt_len + dma_off > dma_map_sz) | |
845 | return false; | |
846 | ||
847 | if (unlikely(nfp_net_tx_full(tx_ring, 1))) { | |
848 | if (!*completed) { | |
849 | nfp_nfd3_xdp_complete(tx_ring); | |
850 | *completed = true; | |
851 | } | |
852 | ||
853 | if (unlikely(nfp_net_tx_full(tx_ring, 1))) { | |
854 | nfp_nfd3_rx_drop(dp, rx_ring->r_vec, rx_ring, rxbuf, | |
855 | NULL); | |
856 | return false; | |
857 | } | |
858 | } | |
859 | ||
860 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); | |
861 | ||
862 | /* Stash the soft descriptor of the head then initialize it */ | |
863 | txbuf = &tx_ring->txbufs[wr_idx]; | |
864 | ||
865 | nfp_nfd3_rx_give_one(dp, rx_ring, txbuf->frag, txbuf->dma_addr); | |
866 | ||
867 | txbuf->frag = rxbuf->frag; | |
868 | txbuf->dma_addr = rxbuf->dma_addr; | |
869 | txbuf->fidx = -1; | |
870 | txbuf->pkt_cnt = 1; | |
871 | txbuf->real_len = pkt_len; | |
872 | ||
873 | dma_sync_single_for_device(dp->dev, rxbuf->dma_addr + dma_off, | |
874 | pkt_len, DMA_BIDIRECTIONAL); | |
875 | ||
876 | /* Build TX descriptor */ | |
877 | txd = &tx_ring->txds[wr_idx]; | |
878 | txd->offset_eop = NFD3_DESC_TX_EOP; | |
879 | txd->dma_len = cpu_to_le16(pkt_len); | |
5f30671d | 880 | nfp_desc_set_dma_addr_40b(txd, rxbuf->dma_addr + dma_off); |
62d03330 JK |
881 | txd->data_len = cpu_to_le16(pkt_len); |
882 | ||
883 | txd->flags = 0; | |
884 | txd->mss = 0; | |
885 | txd->lso_hdrlen = 0; | |
886 | ||
887 | tx_ring->wr_p++; | |
888 | tx_ring->wr_ptr_add++; | |
889 | return true; | |
890 | } | |
891 | ||
892 | /** | |
893 | * nfp_nfd3_rx() - receive up to @budget packets on @rx_ring | |
894 | * @rx_ring: RX ring to receive from | |
895 | * @budget: NAPI budget | |
896 | * | |
897 | * Note, this function is separated out from the napi poll function to | |
898 | * more cleanly separate packet receive code from other bookkeeping | |
899 | * functions performed in the napi poll function. | |
900 | * | |
901 | * Return: Number of packets received. | |
902 | */ | |
903 | static int nfp_nfd3_rx(struct nfp_net_rx_ring *rx_ring, int budget) | |
904 | { | |
905 | struct nfp_net_r_vector *r_vec = rx_ring->r_vec; | |
906 | struct nfp_net_dp *dp = &r_vec->nfp_net->dp; | |
907 | struct nfp_net_tx_ring *tx_ring; | |
908 | struct bpf_prog *xdp_prog; | |
57f273ad | 909 | int idx, pkts_polled = 0; |
62d03330 JK |
910 | bool xdp_tx_cmpl = false; |
911 | unsigned int true_bufsz; | |
912 | struct sk_buff *skb; | |
62d03330 | 913 | struct xdp_buff xdp; |
62d03330 JK |
914 | |
915 | xdp_prog = READ_ONCE(dp->xdp_prog); | |
916 | true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; | |
917 | xdp_init_buff(&xdp, PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM, | |
918 | &rx_ring->xdp_rxq); | |
919 | tx_ring = r_vec->xdp_ring; | |
920 | ||
921 | while (pkts_polled < budget) { | |
922 | unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; | |
923 | struct nfp_net_rx_buf *rxbuf; | |
924 | struct nfp_net_rx_desc *rxd; | |
925 | struct nfp_meta_parsed meta; | |
926 | bool redir_egress = false; | |
927 | struct net_device *netdev; | |
928 | dma_addr_t new_dma_addr; | |
929 | u32 meta_len_xdp = 0; | |
930 | void *new_frag; | |
931 | ||
932 | idx = D_IDX(rx_ring, rx_ring->rd_p); | |
933 | ||
934 | rxd = &rx_ring->rxds[idx]; | |
935 | if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) | |
936 | break; | |
937 | ||
938 | /* Memory barrier to ensure that we won't do other reads | |
939 | * before the DD bit. | |
940 | */ | |
941 | dma_rmb(); | |
942 | ||
943 | memset(&meta, 0, sizeof(meta)); | |
944 | ||
945 | rx_ring->rd_p++; | |
946 | pkts_polled++; | |
947 | ||
948 | rxbuf = &rx_ring->rxbufs[idx]; | |
949 | /* < meta_len > | |
950 | * <-- [rx_offset] --> | |
951 | * --------------------------------------------------------- | |
952 | * | [XX] | metadata | packet | XXXX | | |
953 | * --------------------------------------------------------- | |
954 | * <---------------- data_len ---------------> | |
955 | * | |
956 | * The rx_offset is fixed for all packets, the meta_len can vary | |
957 | * on a packet by packet basis. If rx_offset is set to zero | |
958 | * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the | |
959 | * buffer and is immediately followed by the packet (no [XX]). | |
960 | */ | |
961 | meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; | |
962 | data_len = le16_to_cpu(rxd->rxd.data_len); | |
963 | pkt_len = data_len - meta_len; | |
964 | ||
965 | pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; | |
966 | if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) | |
967 | pkt_off += meta_len; | |
968 | else | |
969 | pkt_off += dp->rx_offset; | |
970 | meta_off = pkt_off - meta_len; | |
971 | ||
972 | /* Stats update */ | |
973 | u64_stats_update_begin(&r_vec->rx_sync); | |
974 | r_vec->rx_pkts++; | |
975 | r_vec->rx_bytes += pkt_len; | |
976 | u64_stats_update_end(&r_vec->rx_sync); | |
977 | ||
978 | if (unlikely(meta_len > NFP_NET_MAX_PREPEND || | |
979 | (dp->rx_offset && meta_len > dp->rx_offset))) { | |
980 | nn_dp_warn(dp, "oversized RX packet metadata %u\n", | |
981 | meta_len); | |
982 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); | |
983 | continue; | |
984 | } | |
985 | ||
986 | nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, | |
987 | data_len); | |
988 | ||
989 | if (!dp->chained_metadata_format) { | |
990 | nfp_nfd3_set_hash_desc(dp->netdev, &meta, | |
991 | rxbuf->frag + meta_off, rxd); | |
992 | } else if (meta_len) { | |
993 | if (unlikely(nfp_nfd3_parse_meta(dp->netdev, &meta, | |
994 | rxbuf->frag + meta_off, | |
995 | rxbuf->frag + pkt_off, | |
996 | pkt_len, meta_len))) { | |
997 | nn_dp_warn(dp, "invalid RX packet metadata\n"); | |
998 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, | |
999 | NULL); | |
1000 | continue; | |
1001 | } | |
1002 | } | |
1003 | ||
1004 | if (xdp_prog && !meta.portid) { | |
1005 | void *orig_data = rxbuf->frag + pkt_off; | |
1006 | unsigned int dma_off; | |
1007 | int act; | |
1008 | ||
1009 | xdp_prepare_buff(&xdp, | |
1010 | rxbuf->frag + NFP_NET_RX_BUF_HEADROOM, | |
1011 | pkt_off - NFP_NET_RX_BUF_HEADROOM, | |
1012 | pkt_len, true); | |
1013 | ||
1014 | act = bpf_prog_run_xdp(xdp_prog, &xdp); | |
1015 | ||
1016 | pkt_len = xdp.data_end - xdp.data; | |
1017 | pkt_off += xdp.data - orig_data; | |
1018 | ||
1019 | switch (act) { | |
1020 | case XDP_PASS: | |
1021 | meta_len_xdp = xdp.data - xdp.data_meta; | |
1022 | break; | |
1023 | case XDP_TX: | |
1024 | dma_off = pkt_off - NFP_NET_RX_BUF_HEADROOM; | |
1025 | if (unlikely(!nfp_nfd3_tx_xdp_buf(dp, rx_ring, | |
1026 | tx_ring, | |
1027 | rxbuf, | |
1028 | dma_off, | |
1029 | pkt_len, | |
1030 | &xdp_tx_cmpl))) | |
1031 | trace_xdp_exception(dp->netdev, | |
1032 | xdp_prog, act); | |
1033 | continue; | |
1034 | default: | |
1035 | bpf_warn_invalid_xdp_action(dp->netdev, xdp_prog, act); | |
1036 | fallthrough; | |
1037 | case XDP_ABORTED: | |
1038 | trace_xdp_exception(dp->netdev, xdp_prog, act); | |
1039 | fallthrough; | |
1040 | case XDP_DROP: | |
1041 | nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, | |
1042 | rxbuf->dma_addr); | |
1043 | continue; | |
1044 | } | |
1045 | } | |
1046 | ||
1047 | if (likely(!meta.portid)) { | |
1048 | netdev = dp->netdev; | |
1049 | } else if (meta.portid == NFP_META_PORT_ID_CTRL) { | |
1050 | struct nfp_net *nn = netdev_priv(dp->netdev); | |
1051 | ||
1052 | nfp_app_ctrl_rx_raw(nn->app, rxbuf->frag + pkt_off, | |
1053 | pkt_len); | |
1054 | nfp_nfd3_rx_give_one(dp, rx_ring, rxbuf->frag, | |
1055 | rxbuf->dma_addr); | |
1056 | continue; | |
1057 | } else { | |
1058 | struct nfp_net *nn; | |
1059 | ||
1060 | nn = netdev_priv(dp->netdev); | |
1061 | netdev = nfp_app_dev_get(nn->app, meta.portid, | |
1062 | &redir_egress); | |
1063 | if (unlikely(!netdev)) { | |
1064 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, | |
1065 | NULL); | |
1066 | continue; | |
1067 | } | |
1068 | ||
1069 | if (nfp_netdev_is_nfp_repr(netdev)) | |
1070 | nfp_repr_inc_rx_stats(netdev, pkt_len); | |
1071 | } | |
1072 | ||
1073 | skb = build_skb(rxbuf->frag, true_bufsz); | |
1074 | if (unlikely(!skb)) { | |
1075 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); | |
1076 | continue; | |
1077 | } | |
1078 | new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr); | |
1079 | if (unlikely(!new_frag)) { | |
1080 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); | |
1081 | continue; | |
1082 | } | |
1083 | ||
1084 | nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); | |
1085 | ||
1086 | nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); | |
1087 | ||
1088 | skb_reserve(skb, pkt_off); | |
1089 | skb_put(skb, pkt_len); | |
1090 | ||
1091 | skb->mark = meta.mark; | |
1092 | skb_set_hash(skb, meta.hash, meta.hash_type); | |
1093 | ||
1094 | skb_record_rx_queue(skb, rx_ring->idx); | |
1095 | skb->protocol = eth_type_trans(skb, netdev); | |
1096 | ||
1097 | nfp_nfd3_rx_csum(dp, r_vec, rxd, &meta, skb); | |
1098 | ||
1099 | #ifdef CONFIG_TLS_DEVICE | |
1100 | if (rxd->rxd.flags & PCIE_DESC_RX_DECRYPTED) { | |
1101 | skb->decrypted = true; | |
1102 | u64_stats_update_begin(&r_vec->rx_sync); | |
1103 | r_vec->hw_tls_rx++; | |
1104 | u64_stats_update_end(&r_vec->rx_sync); | |
1105 | } | |
1106 | #endif | |
1107 | ||
67d2656b DW |
1108 | if (unlikely(!nfp_net_vlan_strip(skb, rxd, &meta))) { |
1109 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); | |
1110 | continue; | |
1111 | } | |
1112 | ||
57f273ad HW |
1113 | #ifdef CONFIG_NFP_NET_IPSEC |
1114 | if (meta.ipsec_saidx != 0 && unlikely(nfp_net_ipsec_rx(&meta, skb))) { | |
1115 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, NULL, skb); | |
1116 | continue; | |
1117 | } | |
1118 | #endif | |
1119 | ||
62d03330 JK |
1120 | if (meta_len_xdp) |
1121 | skb_metadata_set(skb, meta_len_xdp); | |
1122 | ||
1123 | if (likely(!redir_egress)) { | |
1124 | napi_gro_receive(&rx_ring->r_vec->napi, skb); | |
1125 | } else { | |
1126 | skb->dev = netdev; | |
1127 | skb_reset_network_header(skb); | |
1128 | __skb_push(skb, ETH_HLEN); | |
1129 | dev_queue_xmit(skb); | |
1130 | } | |
1131 | } | |
1132 | ||
1133 | if (xdp_prog) { | |
1134 | if (tx_ring->wr_ptr_add) | |
1135 | nfp_net_tx_xmit_more_flush(tx_ring); | |
1136 | else if (unlikely(tx_ring->wr_p != tx_ring->rd_p) && | |
1137 | !xdp_tx_cmpl) | |
1138 | if (!nfp_nfd3_xdp_complete(tx_ring)) | |
1139 | pkts_polled = budget; | |
1140 | } | |
1141 | ||
1142 | return pkts_polled; | |
1143 | } | |
1144 | ||
1145 | /** | |
1146 | * nfp_nfd3_poll() - napi poll function | |
1147 | * @napi: NAPI structure | |
1148 | * @budget: NAPI budget | |
1149 | * | |
1150 | * Return: number of packets polled. | |
1151 | */ | |
1152 | int nfp_nfd3_poll(struct napi_struct *napi, int budget) | |
1153 | { | |
1154 | struct nfp_net_r_vector *r_vec = | |
1155 | container_of(napi, struct nfp_net_r_vector, napi); | |
1156 | unsigned int pkts_polled = 0; | |
1157 | ||
1158 | if (r_vec->tx_ring) | |
1159 | nfp_nfd3_tx_complete(r_vec->tx_ring, budget); | |
1160 | if (r_vec->rx_ring) | |
1161 | pkts_polled = nfp_nfd3_rx(r_vec->rx_ring, budget); | |
1162 | ||
1163 | if (pkts_polled < budget) | |
1164 | if (napi_complete_done(napi, pkts_polled)) | |
1165 | nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); | |
1166 | ||
1167 | if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { | |
1168 | struct dim_sample dim_sample = {}; | |
1169 | unsigned int start; | |
1170 | u64 pkts, bytes; | |
1171 | ||
1172 | do { | |
1173 | start = u64_stats_fetch_begin(&r_vec->rx_sync); | |
1174 | pkts = r_vec->rx_pkts; | |
1175 | bytes = r_vec->rx_bytes; | |
1176 | } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); | |
1177 | ||
1178 | dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); | |
1179 | net_dim(&r_vec->rx_dim, dim_sample); | |
1180 | } | |
1181 | ||
1182 | if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { | |
1183 | struct dim_sample dim_sample = {}; | |
1184 | unsigned int start; | |
1185 | u64 pkts, bytes; | |
1186 | ||
1187 | do { | |
1188 | start = u64_stats_fetch_begin(&r_vec->tx_sync); | |
1189 | pkts = r_vec->tx_pkts; | |
1190 | bytes = r_vec->tx_bytes; | |
1191 | } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); | |
1192 | ||
1193 | dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); | |
1194 | net_dim(&r_vec->tx_dim, dim_sample); | |
1195 | } | |
1196 | ||
1197 | return pkts_polled; | |
1198 | } | |
1199 | ||
1200 | /* Control device data path | |
1201 | */ | |
1202 | ||
6fd86efa JK |
1203 | bool |
1204 | nfp_nfd3_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, | |
1205 | struct sk_buff *skb, bool old) | |
62d03330 JK |
1206 | { |
1207 | unsigned int real_len = skb->len, meta_len = 0; | |
1208 | struct nfp_net_tx_ring *tx_ring; | |
1209 | struct nfp_nfd3_tx_buf *txbuf; | |
1210 | struct nfp_nfd3_tx_desc *txd; | |
1211 | struct nfp_net_dp *dp; | |
1212 | dma_addr_t dma_addr; | |
1213 | int wr_idx; | |
1214 | ||
1215 | dp = &r_vec->nfp_net->dp; | |
1216 | tx_ring = r_vec->tx_ring; | |
1217 | ||
1218 | if (WARN_ON_ONCE(skb_shinfo(skb)->nr_frags)) { | |
1219 | nn_dp_warn(dp, "Driver's CTRL TX does not implement gather\n"); | |
1220 | goto err_free; | |
1221 | } | |
1222 | ||
1223 | if (unlikely(nfp_net_tx_full(tx_ring, 1))) { | |
1224 | u64_stats_update_begin(&r_vec->tx_sync); | |
1225 | r_vec->tx_busy++; | |
1226 | u64_stats_update_end(&r_vec->tx_sync); | |
1227 | if (!old) | |
1228 | __skb_queue_tail(&r_vec->queue, skb); | |
1229 | else | |
1230 | __skb_queue_head(&r_vec->queue, skb); | |
1231 | return true; | |
1232 | } | |
1233 | ||
1234 | if (nfp_app_ctrl_has_meta(nn->app)) { | |
1235 | if (unlikely(skb_headroom(skb) < 8)) { | |
1236 | nn_dp_warn(dp, "CTRL TX on skb without headroom\n"); | |
1237 | goto err_free; | |
1238 | } | |
1239 | meta_len = 8; | |
1240 | put_unaligned_be32(NFP_META_PORT_ID_CTRL, skb_push(skb, 4)); | |
1241 | put_unaligned_be32(NFP_NET_META_PORTID, skb_push(skb, 4)); | |
1242 | } | |
1243 | ||
1244 | /* Start with the head skbuf */ | |
1245 | dma_addr = dma_map_single(dp->dev, skb->data, skb_headlen(skb), | |
1246 | DMA_TO_DEVICE); | |
1247 | if (dma_mapping_error(dp->dev, dma_addr)) | |
1248 | goto err_dma_warn; | |
1249 | ||
1250 | wr_idx = D_IDX(tx_ring, tx_ring->wr_p); | |
1251 | ||
1252 | /* Stash the soft descriptor of the head then initialize it */ | |
1253 | txbuf = &tx_ring->txbufs[wr_idx]; | |
1254 | txbuf->skb = skb; | |
1255 | txbuf->dma_addr = dma_addr; | |
1256 | txbuf->fidx = -1; | |
1257 | txbuf->pkt_cnt = 1; | |
1258 | txbuf->real_len = real_len; | |
1259 | ||
1260 | /* Build TX descriptor */ | |
1261 | txd = &tx_ring->txds[wr_idx]; | |
1262 | txd->offset_eop = meta_len | NFD3_DESC_TX_EOP; | |
1263 | txd->dma_len = cpu_to_le16(skb_headlen(skb)); | |
5f30671d | 1264 | nfp_desc_set_dma_addr_40b(txd, dma_addr); |
62d03330 JK |
1265 | txd->data_len = cpu_to_le16(skb->len); |
1266 | ||
1267 | txd->flags = 0; | |
1268 | txd->mss = 0; | |
1269 | txd->lso_hdrlen = 0; | |
1270 | ||
1271 | tx_ring->wr_p++; | |
1272 | tx_ring->wr_ptr_add++; | |
1273 | nfp_net_tx_xmit_more_flush(tx_ring); | |
1274 | ||
1275 | return false; | |
1276 | ||
1277 | err_dma_warn: | |
1278 | nn_dp_warn(dp, "Failed to DMA map TX CTRL buffer\n"); | |
1279 | err_free: | |
1280 | u64_stats_update_begin(&r_vec->tx_sync); | |
1281 | r_vec->tx_errors++; | |
1282 | u64_stats_update_end(&r_vec->tx_sync); | |
1283 | dev_kfree_skb_any(skb); | |
1284 | return false; | |
1285 | } | |
1286 | ||
62d03330 JK |
1287 | static void __nfp_ctrl_tx_queued(struct nfp_net_r_vector *r_vec) |
1288 | { | |
1289 | struct sk_buff *skb; | |
1290 | ||
1291 | while ((skb = __skb_dequeue(&r_vec->queue))) | |
6fd86efa | 1292 | if (nfp_nfd3_ctrl_tx_one(r_vec->nfp_net, r_vec, skb, true)) |
62d03330 JK |
1293 | return; |
1294 | } | |
1295 | ||
1296 | static bool | |
1297 | nfp_ctrl_meta_ok(struct nfp_net *nn, void *data, unsigned int meta_len) | |
1298 | { | |
1299 | u32 meta_type, meta_tag; | |
1300 | ||
1301 | if (!nfp_app_ctrl_has_meta(nn->app)) | |
1302 | return !meta_len; | |
1303 | ||
1304 | if (meta_len != 8) | |
1305 | return false; | |
1306 | ||
1307 | meta_type = get_unaligned_be32(data); | |
1308 | meta_tag = get_unaligned_be32(data + 4); | |
1309 | ||
1310 | return (meta_type == NFP_NET_META_PORTID && | |
1311 | meta_tag == NFP_META_PORT_ID_CTRL); | |
1312 | } | |
1313 | ||
1314 | static bool | |
1315 | nfp_ctrl_rx_one(struct nfp_net *nn, struct nfp_net_dp *dp, | |
1316 | struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring) | |
1317 | { | |
1318 | unsigned int meta_len, data_len, meta_off, pkt_len, pkt_off; | |
1319 | struct nfp_net_rx_buf *rxbuf; | |
1320 | struct nfp_net_rx_desc *rxd; | |
1321 | dma_addr_t new_dma_addr; | |
1322 | struct sk_buff *skb; | |
1323 | void *new_frag; | |
1324 | int idx; | |
1325 | ||
1326 | idx = D_IDX(rx_ring, rx_ring->rd_p); | |
1327 | ||
1328 | rxd = &rx_ring->rxds[idx]; | |
1329 | if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) | |
1330 | return false; | |
1331 | ||
1332 | /* Memory barrier to ensure that we won't do other reads | |
1333 | * before the DD bit. | |
1334 | */ | |
1335 | dma_rmb(); | |
1336 | ||
1337 | rx_ring->rd_p++; | |
1338 | ||
1339 | rxbuf = &rx_ring->rxbufs[idx]; | |
1340 | meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK; | |
1341 | data_len = le16_to_cpu(rxd->rxd.data_len); | |
1342 | pkt_len = data_len - meta_len; | |
1343 | ||
1344 | pkt_off = NFP_NET_RX_BUF_HEADROOM + dp->rx_dma_off; | |
1345 | if (dp->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) | |
1346 | pkt_off += meta_len; | |
1347 | else | |
1348 | pkt_off += dp->rx_offset; | |
1349 | meta_off = pkt_off - meta_len; | |
1350 | ||
1351 | /* Stats update */ | |
1352 | u64_stats_update_begin(&r_vec->rx_sync); | |
1353 | r_vec->rx_pkts++; | |
1354 | r_vec->rx_bytes += pkt_len; | |
1355 | u64_stats_update_end(&r_vec->rx_sync); | |
1356 | ||
1357 | nfp_net_dma_sync_cpu_rx(dp, rxbuf->dma_addr + meta_off, data_len); | |
1358 | ||
1359 | if (unlikely(!nfp_ctrl_meta_ok(nn, rxbuf->frag + meta_off, meta_len))) { | |
1360 | nn_dp_warn(dp, "incorrect metadata for ctrl packet (%d)\n", | |
1361 | meta_len); | |
1362 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); | |
1363 | return true; | |
1364 | } | |
1365 | ||
1366 | skb = build_skb(rxbuf->frag, dp->fl_bufsz); | |
1367 | if (unlikely(!skb)) { | |
1368 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, NULL); | |
1369 | return true; | |
1370 | } | |
1371 | new_frag = nfp_nfd3_napi_alloc_one(dp, &new_dma_addr); | |
1372 | if (unlikely(!new_frag)) { | |
1373 | nfp_nfd3_rx_drop(dp, r_vec, rx_ring, rxbuf, skb); | |
1374 | return true; | |
1375 | } | |
1376 | ||
1377 | nfp_net_dma_unmap_rx(dp, rxbuf->dma_addr); | |
1378 | ||
1379 | nfp_nfd3_rx_give_one(dp, rx_ring, new_frag, new_dma_addr); | |
1380 | ||
1381 | skb_reserve(skb, pkt_off); | |
1382 | skb_put(skb, pkt_len); | |
1383 | ||
1384 | nfp_app_ctrl_rx(nn->app, skb); | |
1385 | ||
1386 | return true; | |
1387 | } | |
1388 | ||
1389 | static bool nfp_ctrl_rx(struct nfp_net_r_vector *r_vec) | |
1390 | { | |
1391 | struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; | |
1392 | struct nfp_net *nn = r_vec->nfp_net; | |
1393 | struct nfp_net_dp *dp = &nn->dp; | |
1394 | unsigned int budget = 512; | |
1395 | ||
1396 | while (nfp_ctrl_rx_one(nn, dp, r_vec, rx_ring) && budget--) | |
1397 | continue; | |
1398 | ||
1399 | return budget; | |
1400 | } | |
1401 | ||
1402 | void nfp_nfd3_ctrl_poll(struct tasklet_struct *t) | |
1403 | { | |
1404 | struct nfp_net_r_vector *r_vec = from_tasklet(r_vec, t, tasklet); | |
1405 | ||
1406 | spin_lock(&r_vec->lock); | |
1407 | nfp_nfd3_tx_complete(r_vec->tx_ring, 0); | |
1408 | __nfp_ctrl_tx_queued(r_vec); | |
1409 | spin_unlock(&r_vec->lock); | |
1410 | ||
1411 | if (nfp_ctrl_rx(r_vec)) { | |
1412 | nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_entry); | |
1413 | } else { | |
1414 | tasklet_schedule(&r_vec->tasklet); | |
1415 | nn_dp_warn(&r_vec->nfp_net->dp, | |
1416 | "control message budget exceeded!\n"); | |
1417 | } | |
1418 | } |