Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
8ceee660 | 2 | /**************************************************************************** |
f7a6d2c4 | 3 | * Driver for Solarflare network controllers and boards |
8ceee660 | 4 | * Copyright 2005-2006 Fen Systems Ltd. |
f7a6d2c4 | 5 | * Copyright 2005-2013 Solarflare Communications Inc. |
8ceee660 BH |
6 | */ |
7 | ||
8 | #include <linux/socket.h> | |
9 | #include <linux/in.h> | |
5a0e3ad6 | 10 | #include <linux/slab.h> |
8ceee660 | 11 | #include <linux/ip.h> |
c47b2d9d | 12 | #include <linux/ipv6.h> |
8ceee660 BH |
13 | #include <linux/tcp.h> |
14 | #include <linux/udp.h> | |
70c71606 | 15 | #include <linux/prefetch.h> |
6eb07caf | 16 | #include <linux/moduleparam.h> |
2768935a | 17 | #include <linux/iommu.h> |
8ceee660 BH |
18 | #include <net/ip.h> |
19 | #include <net/checksum.h> | |
eb9a36be CM |
20 | #include <net/xdp.h> |
21 | #include <linux/bpf_trace.h> | |
8ceee660 | 22 | #include "net_driver.h" |
8ceee660 | 23 | #include "efx.h" |
e1253f39 | 24 | #include "rx_common.h" |
add72477 | 25 | #include "filter.h" |
744093c9 | 26 | #include "nic.h" |
3273c2e8 | 27 | #include "selftest.h" |
8ceee660 BH |
28 | #include "workarounds.h" |
29 | ||
1648a23f DP |
30 | /* Preferred number of descriptors to fill at once */ |
31 | #define EFX_RX_PREFERRED_BATCH 8U | |
8ceee660 | 32 | |
eb9a36be CM |
33 | /* Maximum rx prefix used by any architecture. */ |
34 | #define EFX_MAX_RX_PREFIX_SIZE 16 | |
35 | ||
8ceee660 | 36 | /* Size of buffer allocated for skb header area. */ |
d4ef5b6f | 37 | #define EFX_SKB_HEADERS 128u |
8ceee660 | 38 | |
85740cdf BH |
39 | /* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */ |
40 | #define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \ | |
41 | EFX_RX_USR_BUF_SIZE) | |
42 | ||
4d566063 BH |
43 | static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue, |
44 | struct efx_rx_buffer *rx_buf, | |
97d48a10 | 45 | int len) |
8ceee660 BH |
46 | { |
47 | struct efx_nic *efx = rx_queue->efx; | |
48 | unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding; | |
49 | ||
50 | if (likely(len <= max_len)) | |
51 | return; | |
52 | ||
53 | /* The packet must be discarded, but this is only a fatal error | |
54 | * if the caller indicated it was | |
55 | */ | |
db339569 | 56 | rx_buf->flags |= EFX_RX_PKT_DISCARD; |
8ceee660 | 57 | |
5a6681e2 EC |
58 | if (net_ratelimit()) |
59 | netif_err(efx, rx_err, efx->net_dev, | |
60 | "RX queue %d overlength RX event (%#x > %#x)\n", | |
61 | efx_rx_queue_index(rx_queue), len, max_len); | |
8ceee660 | 62 | |
ba1e8a35 | 63 | efx_rx_queue_channel(rx_queue)->n_rx_overlength++; |
8ceee660 BH |
64 | } |
65 | ||
85740cdf | 66 | /* Allocate and construct an SKB around page fragments */ |
97d48a10 AR |
67 | static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel, |
68 | struct efx_rx_buffer *rx_buf, | |
85740cdf | 69 | unsigned int n_frags, |
97d48a10 AR |
70 | u8 *eh, int hdr_len) |
71 | { | |
72 | struct efx_nic *efx = channel->efx; | |
73 | struct sk_buff *skb; | |
18e1d2be | 74 | |
97d48a10 | 75 | /* Allocate an SKB to store the headers */ |
2ccd0b19 BH |
76 | skb = netdev_alloc_skb(efx->net_dev, |
77 | efx->rx_ip_align + efx->rx_prefix_size + | |
78 | hdr_len); | |
e4d112e4 EC |
79 | if (unlikely(skb == NULL)) { |
80 | atomic_inc(&efx->n_rx_noskb_drops); | |
97d48a10 | 81 | return NULL; |
e4d112e4 | 82 | } |
97d48a10 | 83 | |
e01b16a7 | 84 | EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len); |
97d48a10 | 85 | |
2ccd0b19 BH |
86 | memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size, |
87 | efx->rx_prefix_size + hdr_len); | |
88 | skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size); | |
89 | __skb_put(skb, hdr_len); | |
97d48a10 | 90 | |
85740cdf | 91 | /* Append the remaining page(s) onto the frag list */ |
97d48a10 | 92 | if (rx_buf->len > hdr_len) { |
85740cdf BH |
93 | rx_buf->page_offset += hdr_len; |
94 | rx_buf->len -= hdr_len; | |
95 | ||
96 | for (;;) { | |
c438a801 CJ |
97 | skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, |
98 | rx_buf->page, rx_buf->page_offset, | |
99 | rx_buf->len, efx->rx_buffer_truesize); | |
85740cdf | 100 | rx_buf->page = NULL; |
c438a801 | 101 | |
85740cdf BH |
102 | if (skb_shinfo(skb)->nr_frags == n_frags) |
103 | break; | |
104 | ||
105 | rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf); | |
106 | } | |
97d48a10 AR |
107 | } else { |
108 | __free_pages(rx_buf->page, efx->rx_buffer_order); | |
85740cdf BH |
109 | rx_buf->page = NULL; |
110 | n_frags = 0; | |
18e1d2be | 111 | } |
97d48a10 | 112 | |
97d48a10 AR |
113 | /* Move past the ethernet header */ |
114 | skb->protocol = eth_type_trans(skb, efx->net_dev); | |
115 | ||
36763266 AR |
116 | skb_mark_napi_id(skb, &channel->napi_str); |
117 | ||
97d48a10 | 118 | return skb; |
8ceee660 BH |
119 | } |
120 | ||
8ceee660 | 121 | void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index, |
85740cdf | 122 | unsigned int n_frags, unsigned int len, u16 flags) |
8ceee660 BH |
123 | { |
124 | struct efx_nic *efx = rx_queue->efx; | |
ba1e8a35 | 125 | struct efx_channel *channel = efx_rx_queue_channel(rx_queue); |
8ceee660 | 126 | struct efx_rx_buffer *rx_buf; |
8ceee660 | 127 | |
8ccf3800 AR |
128 | rx_queue->rx_packets++; |
129 | ||
8ceee660 | 130 | rx_buf = efx_rx_buffer(rx_queue, index); |
179ea7f0 | 131 | rx_buf->flags |= flags; |
8ceee660 | 132 | |
85740cdf BH |
133 | /* Validate the number of fragments and completed length */ |
134 | if (n_frags == 1) { | |
3dced740 BH |
135 | if (!(flags & EFX_RX_PKT_PREFIX_LEN)) |
136 | efx_rx_packet__check_len(rx_queue, rx_buf, len); | |
85740cdf | 137 | } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) || |
e8c68c0a JC |
138 | unlikely(len <= (n_frags - 1) * efx->rx_dma_len) || |
139 | unlikely(len > n_frags * efx->rx_dma_len) || | |
85740cdf BH |
140 | unlikely(!efx->rx_scatter)) { |
141 | /* If this isn't an explicit discard request, either | |
142 | * the hardware or the driver is broken. | |
143 | */ | |
144 | WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD)); | |
145 | rx_buf->flags |= EFX_RX_PKT_DISCARD; | |
146 | } | |
8ceee660 | 147 | |
62776d03 | 148 | netif_vdbg(efx, rx_status, efx->net_dev, |
85740cdf | 149 | "RX queue %d received ids %x-%x len %d %s%s\n", |
ba1e8a35 | 150 | efx_rx_queue_index(rx_queue), index, |
85740cdf | 151 | (index + n_frags - 1) & rx_queue->ptr_mask, len, |
db339569 BH |
152 | (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "", |
153 | (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : ""); | |
8ceee660 | 154 | |
85740cdf BH |
155 | /* Discard packet, if instructed to do so. Process the |
156 | * previous receive first. | |
157 | */ | |
db339569 | 158 | if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) { |
85740cdf | 159 | efx_rx_flush_packet(channel); |
734d4e15 | 160 | efx_discard_rx_packet(channel, rx_buf, n_frags); |
85740cdf | 161 | return; |
8ceee660 BH |
162 | } |
163 | ||
3dced740 | 164 | if (n_frags == 1 && !(flags & EFX_RX_PKT_PREFIX_LEN)) |
85740cdf BH |
165 | rx_buf->len = len; |
166 | ||
2768935a DP |
167 | /* Release and/or sync the DMA mapping - assumes all RX buffers |
168 | * consumed in-order per RX queue. | |
8ceee660 | 169 | */ |
2768935a | 170 | efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); |
8ceee660 BH |
171 | |
172 | /* Prefetch nice and early so data will (hopefully) be in cache by | |
173 | * the time we look at it. | |
174 | */ | |
5036b7c7 | 175 | prefetch(efx_rx_buf_va(rx_buf)); |
8ceee660 | 176 | |
43a3739d JC |
177 | rx_buf->page_offset += efx->rx_prefix_size; |
178 | rx_buf->len -= efx->rx_prefix_size; | |
85740cdf BH |
179 | |
180 | if (n_frags > 1) { | |
181 | /* Release/sync DMA mapping for additional fragments. | |
182 | * Fix length for last fragment. | |
183 | */ | |
184 | unsigned int tail_frags = n_frags - 1; | |
185 | ||
186 | for (;;) { | |
187 | rx_buf = efx_rx_buf_next(rx_queue, rx_buf); | |
188 | if (--tail_frags == 0) | |
189 | break; | |
e8c68c0a | 190 | efx_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len); |
85740cdf | 191 | } |
e8c68c0a | 192 | rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len; |
2768935a | 193 | efx_sync_rx_buffer(efx, rx_buf, rx_buf->len); |
85740cdf | 194 | } |
b74e3e8c | 195 | |
734d4e15 | 196 | /* All fragments have been DMA-synced, so recycle pages. */ |
2768935a | 197 | rx_buf = efx_rx_buffer(rx_queue, index); |
734d4e15 | 198 | efx_recycle_rx_pages(channel, rx_buf, n_frags); |
2768935a | 199 | |
8ceee660 BH |
200 | /* Pipeline receives so that we give time for packet headers to be |
201 | * prefetched into cache. | |
202 | */ | |
ff734ef4 | 203 | efx_rx_flush_packet(channel); |
85740cdf BH |
204 | channel->rx_pkt_n_frags = n_frags; |
205 | channel->rx_pkt_index = index; | |
8ceee660 BH |
206 | } |
207 | ||
97d48a10 | 208 | static void efx_rx_deliver(struct efx_channel *channel, u8 *eh, |
85740cdf BH |
209 | struct efx_rx_buffer *rx_buf, |
210 | unsigned int n_frags) | |
1ddceb4c BH |
211 | { |
212 | struct sk_buff *skb; | |
97d48a10 | 213 | u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS); |
1ddceb4c | 214 | |
85740cdf | 215 | skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len); |
97d48a10 | 216 | if (unlikely(skb == NULL)) { |
9eb0a5d1 DP |
217 | struct efx_rx_queue *rx_queue; |
218 | ||
219 | rx_queue = efx_channel_get_rx_queue(channel); | |
220 | efx_free_rx_buffers(rx_queue, rx_buf, n_frags); | |
97d48a10 AR |
221 | return; |
222 | } | |
223 | skb_record_rx_queue(skb, channel->rx_queue.core_index); | |
1ddceb4c BH |
224 | |
225 | /* Set the SKB flags */ | |
226 | skb_checksum_none_assert(skb); | |
da50ae2e | 227 | if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) { |
c99dffc4 | 228 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
da50ae2e JC |
229 | skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL); |
230 | } | |
1ddceb4c | 231 | |
bd9a265d JC |
232 | efx_rx_skb_attach_timestamp(channel, skb); |
233 | ||
c31e5f9f | 234 | if (channel->type->receive_skb) |
4a74dc65 | 235 | if (channel->type->receive_skb(channel, skb)) |
97d48a10 | 236 | return; |
4a74dc65 BH |
237 | |
238 | /* Pass the packet up */ | |
e090bfb9 EC |
239 | if (channel->rx_list != NULL) |
240 | /* Add to list, will pass up later */ | |
241 | list_add_tail(&skb->list, channel->rx_list); | |
242 | else | |
243 | /* No list, so pass it up now */ | |
244 | netif_receive_skb(skb); | |
1ddceb4c BH |
245 | } |
246 | ||
eb9a36be CM |
247 | /** efx_do_xdp: perform XDP processing on a received packet |
248 | * | |
249 | * Returns true if packet should still be delivered. | |
250 | */ | |
251 | static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel, | |
252 | struct efx_rx_buffer *rx_buf, u8 **ehp) | |
253 | { | |
254 | u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE]; | |
255 | struct efx_rx_queue *rx_queue; | |
256 | struct bpf_prog *xdp_prog; | |
dfe44c1f | 257 | struct xdp_frame *xdpf; |
eb9a36be CM |
258 | struct xdp_buff xdp; |
259 | u32 xdp_act; | |
260 | s16 offset; | |
261 | int err; | |
262 | ||
4eb14e3f THJ |
263 | xdp_prog = rcu_dereference_bh(efx->xdp_prog); |
264 | if (!xdp_prog) | |
eb9a36be | 265 | return true; |
eb9a36be CM |
266 | |
267 | rx_queue = efx_channel_get_rx_queue(channel); | |
268 | ||
269 | if (unlikely(channel->rx_pkt_n_frags > 1)) { | |
270 | /* We can't do XDP on fragmented packets - drop. */ | |
eb9a36be CM |
271 | efx_free_rx_buffers(rx_queue, rx_buf, |
272 | channel->rx_pkt_n_frags); | |
273 | if (net_ratelimit()) | |
274 | netif_err(efx, rx_err, efx->net_dev, | |
275 | "XDP is not possible with multiple receive fragments (%d)\n", | |
276 | channel->rx_pkt_n_frags); | |
cd846bef | 277 | channel->n_rx_xdp_bad_drops++; |
eb9a36be CM |
278 | return false; |
279 | } | |
280 | ||
281 | dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, | |
282 | rx_buf->len, DMA_FROM_DEVICE); | |
283 | ||
284 | /* Save the rx prefix. */ | |
285 | EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE); | |
286 | memcpy(rx_prefix, *ehp - efx->rx_prefix_size, | |
287 | efx->rx_prefix_size); | |
288 | ||
43b5169d | 289 | xdp_init_buff(&xdp, efx->rx_page_buf_step, &rx_queue->xdp_rxq_info); |
eb9a36be | 290 | /* No support yet for XDP metadata */ |
be9df4af LB |
291 | xdp_prepare_buff(&xdp, *ehp - EFX_XDP_HEADROOM, EFX_XDP_HEADROOM, |
292 | rx_buf->len, false); | |
eb9a36be CM |
293 | |
294 | xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp); | |
eb9a36be CM |
295 | |
296 | offset = (u8 *)xdp.data - *ehp; | |
297 | ||
298 | switch (xdp_act) { | |
299 | case XDP_PASS: | |
300 | /* Fix up rx prefix. */ | |
301 | if (offset) { | |
302 | *ehp += offset; | |
303 | rx_buf->page_offset += offset; | |
304 | rx_buf->len -= offset; | |
305 | memcpy(*ehp - efx->rx_prefix_size, rx_prefix, | |
306 | efx->rx_prefix_size); | |
307 | } | |
308 | break; | |
309 | ||
310 | case XDP_TX: | |
dfe44c1f | 311 | /* Buffer ownership passes to tx on success. */ |
1b698fa5 | 312 | xdpf = xdp_convert_buff_to_frame(&xdp); |
dfe44c1f CM |
313 | err = efx_xdp_tx_buffers(efx, 1, &xdpf, true); |
314 | if (unlikely(err != 1)) { | |
315 | efx_free_rx_buffers(rx_queue, rx_buf, 1); | |
316 | if (net_ratelimit()) | |
317 | netif_err(efx, rx_err, efx->net_dev, | |
318 | "XDP TX failed (%d)\n", err); | |
cd846bef | 319 | channel->n_rx_xdp_bad_drops++; |
9440a875 | 320 | trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); |
cd846bef CM |
321 | } else { |
322 | channel->n_rx_xdp_tx++; | |
dfe44c1f CM |
323 | } |
324 | break; | |
eb9a36be CM |
325 | |
326 | case XDP_REDIRECT: | |
327 | err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog); | |
328 | if (unlikely(err)) { | |
329 | efx_free_rx_buffers(rx_queue, rx_buf, 1); | |
330 | if (net_ratelimit()) | |
331 | netif_err(efx, rx_err, efx->net_dev, | |
332 | "XDP redirect failed (%d)\n", err); | |
cd846bef | 333 | channel->n_rx_xdp_bad_drops++; |
9440a875 | 334 | trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); |
cd846bef CM |
335 | } else { |
336 | channel->n_rx_xdp_redirect++; | |
eb9a36be CM |
337 | } |
338 | break; | |
339 | ||
340 | default: | |
c8064e5b | 341 | bpf_warn_invalid_xdp_action(efx->net_dev, xdp_prog, xdp_act); |
eb9a36be | 342 | efx_free_rx_buffers(rx_queue, rx_buf, 1); |
cd846bef | 343 | channel->n_rx_xdp_bad_drops++; |
9440a875 | 344 | trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); |
eb9a36be CM |
345 | break; |
346 | ||
347 | case XDP_ABORTED: | |
348 | trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act); | |
df561f66 | 349 | fallthrough; |
eb9a36be CM |
350 | case XDP_DROP: |
351 | efx_free_rx_buffers(rx_queue, rx_buf, 1); | |
cd846bef | 352 | channel->n_rx_xdp_drops++; |
eb9a36be CM |
353 | break; |
354 | } | |
355 | ||
356 | return xdp_act == XDP_PASS; | |
357 | } | |
358 | ||
8ceee660 | 359 | /* Handle a received packet. Second half: Touches packet payload. */ |
85740cdf | 360 | void __efx_rx_packet(struct efx_channel *channel) |
8ceee660 BH |
361 | { |
362 | struct efx_nic *efx = channel->efx; | |
85740cdf BH |
363 | struct efx_rx_buffer *rx_buf = |
364 | efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index); | |
b74e3e8c | 365 | u8 *eh = efx_rx_buf_va(rx_buf); |
604f6049 | 366 | |
3dced740 BH |
367 | /* Read length from the prefix if necessary. This already |
368 | * excludes the length of the prefix itself. | |
369 | */ | |
370 | if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) | |
371 | rx_buf->len = le16_to_cpup((__le16 *) | |
372 | (eh + efx->rx_packet_len_offset)); | |
373 | ||
3273c2e8 BH |
374 | /* If we're in loopback test, then pass the packet directly to the |
375 | * loopback layer, and free the rx_buf here | |
376 | */ | |
377 | if (unlikely(efx->loopback_selftest)) { | |
9eb0a5d1 DP |
378 | struct efx_rx_queue *rx_queue; |
379 | ||
a526f140 | 380 | efx_loopback_rx_packet(efx, eh, rx_buf->len); |
9eb0a5d1 DP |
381 | rx_queue = efx_channel_get_rx_queue(channel); |
382 | efx_free_rx_buffers(rx_queue, rx_buf, | |
383 | channel->rx_pkt_n_frags); | |
85740cdf | 384 | goto out; |
3273c2e8 BH |
385 | } |
386 | ||
eb9a36be CM |
387 | if (!efx_do_xdp(efx, channel, rx_buf, &eh)) |
388 | goto out; | |
389 | ||
abfe9039 | 390 | if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM))) |
db339569 | 391 | rx_buf->flags &= ~EFX_RX_PKT_CSUMMED; |
ab3cf6d0 | 392 | |
e7fe9491 | 393 | if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb) |
4d9c0a2d | 394 | efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh, 0); |
1ddceb4c | 395 | else |
85740cdf BH |
396 | efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags); |
397 | out: | |
398 | channel->rx_pkt_n_frags = 0; | |
8ceee660 | 399 | } |