Commit | Line | Data |
---|---|---|
17d3b21c AM |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /**************************************************************************** | |
3 | * Driver for Solarflare network controllers and boards | |
4 | * Copyright 2018 Solarflare Communications Inc. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify it | |
7 | * under the terms of the GNU General Public License version 2 as published | |
8 | * by the Free Software Foundation, incorporated herein by reference. | |
9 | */ | |
10 | ||
11 | #include "net_driver.h" | |
12 | #include "efx.h" | |
93841000 | 13 | #include "nic_common.h" |
17d3b21c AM |
14 | #include "tx_common.h" |
15 | ||
16 | static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) | |
17 | { | |
18 | return DIV_ROUND_UP(tx_queue->ptr_mask + 1, | |
19 | PAGE_SIZE >> EFX_TX_CB_ORDER); | |
20 | } | |
21 | ||
22 | int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) | |
23 | { | |
24 | struct efx_nic *efx = tx_queue->efx; | |
25 | unsigned int entries; | |
26 | int rc; | |
27 | ||
28 | /* Create the smallest power-of-two aligned ring */ | |
29 | entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE); | |
30 | EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE); | |
31 | tx_queue->ptr_mask = entries - 1; | |
32 | ||
33 | netif_dbg(efx, probe, efx->net_dev, | |
34 | "creating TX queue %d size %#x mask %#x\n", | |
35 | tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask); | |
36 | ||
37 | /* Allocate software ring */ | |
38 | tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer), | |
39 | GFP_KERNEL); | |
40 | if (!tx_queue->buffer) | |
41 | return -ENOMEM; | |
42 | ||
43 | tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), | |
44 | sizeof(tx_queue->cb_page[0]), GFP_KERNEL); | |
45 | if (!tx_queue->cb_page) { | |
46 | rc = -ENOMEM; | |
47 | goto fail1; | |
48 | } | |
49 | ||
12804793 | 50 | /* Allocate hardware ring, determine TXQ type */ |
17d3b21c AM |
51 | rc = efx_nic_probe_tx(tx_queue); |
52 | if (rc) | |
53 | goto fail2; | |
54 | ||
12804793 | 55 | tx_queue->channel->tx_queue_by_type[tx_queue->type] = tx_queue; |
17d3b21c AM |
56 | return 0; |
57 | ||
58 | fail2: | |
59 | kfree(tx_queue->cb_page); | |
60 | tx_queue->cb_page = NULL; | |
61 | fail1: | |
62 | kfree(tx_queue->buffer); | |
63 | tx_queue->buffer = NULL; | |
64 | return rc; | |
65 | } | |
66 | ||
67 | void efx_init_tx_queue(struct efx_tx_queue *tx_queue) | |
68 | { | |
69 | struct efx_nic *efx = tx_queue->efx; | |
70 | ||
71 | netif_dbg(efx, drv, efx->net_dev, | |
72 | "initialising TX queue %d\n", tx_queue->queue); | |
73 | ||
74 | tx_queue->insert_count = 0; | |
d19a5372 | 75 | tx_queue->notify_count = 0; |
17d3b21c AM |
76 | tx_queue->write_count = 0; |
77 | tx_queue->packet_write_count = 0; | |
78 | tx_queue->old_write_count = 0; | |
79 | tx_queue->read_count = 0; | |
80 | tx_queue->old_read_count = 0; | |
81 | tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; | |
1c0544d2 | 82 | tx_queue->xmit_pending = false; |
17d3b21c AM |
83 | tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) && |
84 | tx_queue->channel == efx_ptp_channel(efx)); | |
17d3b21c AM |
85 | tx_queue->completed_timestamp_major = 0; |
86 | tx_queue->completed_timestamp_minor = 0; | |
87 | ||
88 | tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel); | |
1679c72c | 89 | tx_queue->tso_version = 0; |
17d3b21c AM |
90 | |
91 | /* Set up TX descriptor ring */ | |
92 | efx_nic_init_tx(tx_queue); | |
93 | ||
94 | tx_queue->initialised = true; | |
95 | } | |
96 | ||
97 | void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) | |
98 | { | |
99 | struct efx_tx_buffer *buffer; | |
100 | ||
101 | netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, | |
102 | "shutting down TX queue %d\n", tx_queue->queue); | |
103 | ||
fb5833d8 TY |
104 | tx_queue->initialised = false; |
105 | ||
17d3b21c AM |
106 | if (!tx_queue->buffer) |
107 | return; | |
108 | ||
109 | /* Free any buffers left in the ring */ | |
110 | while (tx_queue->read_count != tx_queue->write_count) { | |
111 | unsigned int pkts_compl = 0, bytes_compl = 0; | |
02443ab8 | 112 | unsigned int efv_pkts_compl = 0; |
17d3b21c AM |
113 | |
114 | buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask]; | |
02443ab8 EC |
115 | efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl, |
116 | &efv_pkts_compl); | |
17d3b21c AM |
117 | |
118 | ++tx_queue->read_count; | |
119 | } | |
1c0544d2 | 120 | tx_queue->xmit_pending = false; |
17d3b21c AM |
121 | netdev_tx_reset_queue(tx_queue->core_txq); |
122 | } | |
123 | ||
124 | void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) | |
125 | { | |
126 | int i; | |
127 | ||
128 | if (!tx_queue->buffer) | |
129 | return; | |
130 | ||
131 | netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, | |
132 | "destroying TX queue %d\n", tx_queue->queue); | |
133 | efx_nic_remove_tx(tx_queue); | |
134 | ||
135 | if (tx_queue->cb_page) { | |
136 | for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) | |
137 | efx_nic_free_buffer(tx_queue->efx, | |
138 | &tx_queue->cb_page[i]); | |
139 | kfree(tx_queue->cb_page); | |
140 | tx_queue->cb_page = NULL; | |
141 | } | |
142 | ||
143 | kfree(tx_queue->buffer); | |
144 | tx_queue->buffer = NULL; | |
12804793 | 145 | tx_queue->channel->tx_queue_by_type[tx_queue->type] = NULL; |
17d3b21c AM |
146 | } |
147 | ||
148 | void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, | |
149 | struct efx_tx_buffer *buffer, | |
150 | unsigned int *pkts_compl, | |
02443ab8 EC |
151 | unsigned int *bytes_compl, |
152 | unsigned int *efv_pkts_compl) | |
17d3b21c AM |
153 | { |
154 | if (buffer->unmap_len) { | |
155 | struct device *dma_dev = &tx_queue->efx->pci_dev->dev; | |
156 | dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset; | |
157 | ||
158 | if (buffer->flags & EFX_TX_BUF_MAP_SINGLE) | |
159 | dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len, | |
160 | DMA_TO_DEVICE); | |
161 | else | |
162 | dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len, | |
163 | DMA_TO_DEVICE); | |
164 | buffer->unmap_len = 0; | |
165 | } | |
166 | ||
167 | if (buffer->flags & EFX_TX_BUF_SKB) { | |
168 | struct sk_buff *skb = (struct sk_buff *)buffer->skb; | |
169 | ||
02443ab8 EC |
170 | if (unlikely(buffer->flags & EFX_TX_BUF_EFV)) { |
171 | EFX_WARN_ON_PARANOID(!efv_pkts_compl); | |
172 | (*efv_pkts_compl)++; | |
173 | } else { | |
174 | EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl); | |
175 | (*pkts_compl)++; | |
176 | (*bytes_compl) += skb->len; | |
177 | } | |
178 | ||
17d3b21c AM |
179 | if (tx_queue->timestamping && |
180 | (tx_queue->completed_timestamp_major || | |
181 | tx_queue->completed_timestamp_minor)) { | |
182 | struct skb_shared_hwtstamps hwtstamp; | |
183 | ||
184 | hwtstamp.hwtstamp = | |
185 | efx_ptp_nic_to_kernel_time(tx_queue); | |
186 | skb_tstamp_tx(skb, &hwtstamp); | |
187 | ||
188 | tx_queue->completed_timestamp_major = 0; | |
189 | tx_queue->completed_timestamp_minor = 0; | |
190 | } | |
191 | dev_consume_skb_any((struct sk_buff *)buffer->skb); | |
192 | netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev, | |
193 | "TX queue %d transmission id %x complete\n", | |
194 | tx_queue->queue, tx_queue->read_count); | |
195 | } else if (buffer->flags & EFX_TX_BUF_XDP) { | |
196 | xdp_return_frame_rx_napi(buffer->xdpf); | |
197 | } | |
198 | ||
199 | buffer->len = 0; | |
200 | buffer->flags = 0; | |
201 | } | |
202 | ||
b8cd9499 AM |
203 | /* Remove packets from the TX queue |
204 | * | |
205 | * This removes packets from the TX queue, up to and including the | |
206 | * specified index. | |
207 | */ | |
208 | static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue, | |
209 | unsigned int index, | |
210 | unsigned int *pkts_compl, | |
02443ab8 EC |
211 | unsigned int *bytes_compl, |
212 | unsigned int *efv_pkts_compl) | |
b8cd9499 AM |
213 | { |
214 | struct efx_nic *efx = tx_queue->efx; | |
215 | unsigned int stop_index, read_ptr; | |
216 | ||
217 | stop_index = (index + 1) & tx_queue->ptr_mask; | |
218 | read_ptr = tx_queue->read_count & tx_queue->ptr_mask; | |
219 | ||
220 | while (read_ptr != stop_index) { | |
221 | struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr]; | |
222 | ||
3b4f06c7 | 223 | if (!efx_tx_buffer_in_use(buffer)) { |
b8cd9499 | 224 | netif_err(efx, tx_err, efx->net_dev, |
3b4f06c7 | 225 | "TX queue %d spurious TX completion id %d\n", |
b8cd9499 AM |
226 | tx_queue->queue, read_ptr); |
227 | efx_schedule_reset(efx, RESET_TYPE_TX_SKIP); | |
228 | return; | |
229 | } | |
230 | ||
02443ab8 EC |
231 | efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl, |
232 | efv_pkts_compl); | |
b8cd9499 AM |
233 | |
234 | ++tx_queue->read_count; | |
235 | read_ptr = tx_queue->read_count & tx_queue->ptr_mask; | |
236 | } | |
237 | } | |
238 | ||
3b4f06c7 TZ |
239 | void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue) |
240 | { | |
241 | if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) { | |
242 | tx_queue->old_write_count = READ_ONCE(tx_queue->write_count); | |
243 | if (tx_queue->read_count == tx_queue->old_write_count) { | |
244 | /* Ensure that read_count is flushed. */ | |
245 | smp_mb(); | |
246 | tx_queue->empty_read_count = | |
247 | tx_queue->read_count | EFX_EMPTY_COUNT_VALID; | |
248 | } | |
249 | } | |
250 | } | |
251 | ||
b8cd9499 AM |
252 | void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) |
253 | { | |
254 | unsigned int fill_level, pkts_compl = 0, bytes_compl = 0; | |
02443ab8 | 255 | unsigned int efv_pkts_compl = 0; |
b8cd9499 | 256 | struct efx_nic *efx = tx_queue->efx; |
b8cd9499 AM |
257 | |
258 | EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask); | |
259 | ||
02443ab8 EC |
260 | efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl, |
261 | &efv_pkts_compl); | |
b8cd9499 AM |
262 | tx_queue->pkts_compl += pkts_compl; |
263 | tx_queue->bytes_compl += bytes_compl; | |
264 | ||
02443ab8 | 265 | if (pkts_compl + efv_pkts_compl > 1) |
b8cd9499 AM |
266 | ++tx_queue->merge_events; |
267 | ||
268 | /* See if we need to restart the netif queue. This memory | |
269 | * barrier ensures that we write read_count (inside | |
270 | * efx_dequeue_buffers()) before reading the queue status. | |
271 | */ | |
272 | smp_mb(); | |
273 | if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) && | |
274 | likely(efx->port_enabled) && | |
275 | likely(netif_device_present(efx->net_dev))) { | |
0d8c1229 | 276 | fill_level = efx_channel_tx_fill_level(tx_queue->channel); |
b8cd9499 AM |
277 | if (fill_level <= efx->txq_wake_thresh) |
278 | netif_tx_wake_queue(tx_queue->core_txq); | |
279 | } | |
280 | ||
3b4f06c7 | 281 | efx_xmit_done_check_empty(tx_queue); |
b8cd9499 AM |
282 | } |
283 | ||
88f7df35 AM |
284 | /* Remove buffers put into a tx_queue for the current packet. |
285 | * None of the buffers must have an skb attached. | |
286 | */ | |
287 | void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, | |
288 | unsigned int insert_count) | |
289 | { | |
02443ab8 | 290 | unsigned int efv_pkts_compl = 0; |
88f7df35 AM |
291 | struct efx_tx_buffer *buffer; |
292 | unsigned int bytes_compl = 0; | |
293 | unsigned int pkts_compl = 0; | |
294 | ||
295 | /* Work backwards until we hit the original insert pointer value */ | |
296 | while (tx_queue->insert_count != insert_count) { | |
297 | --tx_queue->insert_count; | |
298 | buffer = __efx_tx_queue_get_insert_buffer(tx_queue); | |
02443ab8 EC |
299 | efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl, |
300 | &efv_pkts_compl); | |
88f7df35 AM |
301 | } |
302 | } | |
303 | ||
17d3b21c AM |
304 | struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, |
305 | dma_addr_t dma_addr, size_t len) | |
306 | { | |
307 | const struct efx_nic_type *nic_type = tx_queue->efx->type; | |
308 | struct efx_tx_buffer *buffer; | |
309 | unsigned int dma_len; | |
310 | ||
311 | /* Map the fragment taking account of NIC-dependent DMA limits. */ | |
312 | do { | |
313 | buffer = efx_tx_queue_get_insert_buffer(tx_queue); | |
79de6e7c EC |
314 | |
315 | if (nic_type->tx_limit_len) | |
316 | dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); | |
317 | else | |
318 | dma_len = len; | |
17d3b21c AM |
319 | |
320 | buffer->len = dma_len; | |
321 | buffer->dma_addr = dma_addr; | |
322 | buffer->flags = EFX_TX_BUF_CONT; | |
323 | len -= dma_len; | |
324 | dma_addr += dma_len; | |
325 | ++tx_queue->insert_count; | |
326 | } while (len); | |
327 | ||
328 | return buffer; | |
329 | } | |
330 | ||
e7a25685 EC |
331 | int efx_tx_tso_header_length(struct sk_buff *skb) |
332 | { | |
333 | size_t header_len; | |
334 | ||
335 | if (skb->encapsulation) | |
336 | header_len = skb_inner_transport_header(skb) - | |
337 | skb->data + | |
338 | (inner_tcp_hdr(skb)->doff << 2u); | |
339 | else | |
340 | header_len = skb_transport_header(skb) - skb->data + | |
341 | (tcp_hdr(skb)->doff << 2u); | |
342 | return header_len; | |
343 | } | |
344 | ||
17d3b21c AM |
345 | /* Map all data from an SKB for DMA and create descriptors on the queue. */ |
346 | int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, | |
347 | unsigned int segment_count) | |
348 | { | |
349 | struct efx_nic *efx = tx_queue->efx; | |
350 | struct device *dma_dev = &efx->pci_dev->dev; | |
351 | unsigned int frag_index, nr_frags; | |
352 | dma_addr_t dma_addr, unmap_addr; | |
353 | unsigned short dma_flags; | |
354 | size_t len, unmap_len; | |
355 | ||
356 | nr_frags = skb_shinfo(skb)->nr_frags; | |
357 | frag_index = 0; | |
358 | ||
359 | /* Map header data. */ | |
360 | len = skb_headlen(skb); | |
361 | dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); | |
362 | dma_flags = EFX_TX_BUF_MAP_SINGLE; | |
363 | unmap_len = len; | |
364 | unmap_addr = dma_addr; | |
365 | ||
366 | if (unlikely(dma_mapping_error(dma_dev, dma_addr))) | |
367 | return -EIO; | |
368 | ||
369 | if (segment_count) { | |
370 | /* For TSO we need to put the header in to a separate | |
371 | * descriptor. Map this separately if necessary. | |
372 | */ | |
e7a25685 | 373 | size_t header_len = efx_tx_tso_header_length(skb); |
17d3b21c AM |
374 | |
375 | if (header_len != len) { | |
376 | tx_queue->tso_long_headers++; | |
377 | efx_tx_map_chunk(tx_queue, dma_addr, header_len); | |
378 | len -= header_len; | |
379 | dma_addr += header_len; | |
380 | } | |
381 | } | |
382 | ||
383 | /* Add descriptors for each fragment. */ | |
384 | do { | |
385 | struct efx_tx_buffer *buffer; | |
386 | skb_frag_t *fragment; | |
387 | ||
388 | buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); | |
389 | ||
390 | /* The final descriptor for a fragment is responsible for | |
391 | * unmapping the whole fragment. | |
392 | */ | |
393 | buffer->flags = EFX_TX_BUF_CONT | dma_flags; | |
394 | buffer->unmap_len = unmap_len; | |
395 | buffer->dma_offset = buffer->dma_addr - unmap_addr; | |
396 | ||
397 | if (frag_index >= nr_frags) { | |
398 | /* Store SKB details with the final buffer for | |
399 | * the completion. | |
400 | */ | |
401 | buffer->skb = skb; | |
402 | buffer->flags = EFX_TX_BUF_SKB | dma_flags; | |
403 | return 0; | |
404 | } | |
405 | ||
406 | /* Move on to the next fragment. */ | |
407 | fragment = &skb_shinfo(skb)->frags[frag_index++]; | |
408 | len = skb_frag_size(fragment); | |
409 | dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, | |
410 | DMA_TO_DEVICE); | |
411 | dma_flags = 0; | |
412 | unmap_len = len; | |
413 | unmap_addr = dma_addr; | |
414 | ||
415 | if (unlikely(dma_mapping_error(dma_dev, dma_addr))) | |
416 | return -EIO; | |
417 | } while (1); | |
418 | } | |
419 | ||
420 | unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) | |
421 | { | |
422 | /* Header and payload descriptor for each output segment, plus | |
423 | * one for every input fragment boundary within a segment | |
424 | */ | |
425 | unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS; | |
426 | ||
427 | /* Possibly one more per segment for option descriptors */ | |
428 | if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) | |
429 | max_descs += EFX_TSO_MAX_SEGS; | |
430 | ||
431 | /* Possibly more for PCIe page boundaries within input fragments */ | |
432 | if (PAGE_SIZE > EFX_PAGE_SIZE) | |
433 | max_descs += max_t(unsigned int, MAX_SKB_FRAGS, | |
7c4e983c AD |
434 | DIV_ROUND_UP(GSO_LEGACY_MAX_SIZE, |
435 | EFX_PAGE_SIZE)); | |
17d3b21c AM |
436 | |
437 | return max_descs; | |
438 | } | |
740acc15 EC |
439 | |
440 | /* | |
441 | * Fallback to software TSO. | |
442 | * | |
443 | * This is used if we are unable to send a GSO packet through hardware TSO. | |
444 | * This should only ever happen due to per-queue restrictions - unsupported | |
445 | * packets should first be filtered by the feature flags. | |
446 | * | |
447 | * Returns 0 on success, error code otherwise. | |
448 | */ | |
449 | int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, struct sk_buff *skb) | |
450 | { | |
451 | struct sk_buff *segments, *next; | |
452 | ||
453 | segments = skb_gso_segment(skb, 0); | |
454 | if (IS_ERR(segments)) | |
455 | return PTR_ERR(segments); | |
456 | ||
457 | dev_consume_skb_any(skb); | |
458 | ||
459 | skb_list_walk_safe(segments, skb, next) { | |
460 | skb_mark_not_on_list(skb); | |
461 | efx_enqueue_skb(tx_queue, skb); | |
462 | } | |
463 | ||
464 | return 0; | |
465 | } |