Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski...
[linux-2.6-block.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2016 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26
27 #include <linux/prefetch.h>
28 #include <net/busy_poll.h>
29 #include "i40e.h"
30 #include "i40e_prototype.h"
31
32 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
33                                 u32 td_tag)
34 {
35         return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
36                            ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
37                            ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
38                            ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
39                            ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
40 }
41
42 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
43 #define I40E_FD_CLEAN_DELAY 10
44 /**
45  * i40e_program_fdir_filter - Program a Flow Director filter
46  * @fdir_data: Packet data that will be filter parameters
47  * @raw_packet: the pre-allocated packet buffer for FDir
48  * @pf: The PF pointer
49  * @add: True for add/update, False for remove
50  **/
51 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
52                              struct i40e_pf *pf, bool add)
53 {
54         struct i40e_filter_program_desc *fdir_desc;
55         struct i40e_tx_buffer *tx_buf, *first;
56         struct i40e_tx_desc *tx_desc;
57         struct i40e_ring *tx_ring;
58         unsigned int fpt, dcc;
59         struct i40e_vsi *vsi;
60         struct device *dev;
61         dma_addr_t dma;
62         u32 td_cmd = 0;
63         u16 delay = 0;
64         u16 i;
65
66         /* find existing FDIR VSI */
67         vsi = NULL;
68         for (i = 0; i < pf->num_alloc_vsi; i++)
69                 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
70                         vsi = pf->vsi[i];
71         if (!vsi)
72                 return -ENOENT;
73
74         tx_ring = vsi->tx_rings[0];
75         dev = tx_ring->dev;
76
77         /* we need two descriptors to add/del a filter and we can wait */
78         do {
79                 if (I40E_DESC_UNUSED(tx_ring) > 1)
80                         break;
81                 msleep_interruptible(1);
82                 delay++;
83         } while (delay < I40E_FD_CLEAN_DELAY);
84
85         if (!(I40E_DESC_UNUSED(tx_ring) > 1))
86                 return -EAGAIN;
87
88         dma = dma_map_single(dev, raw_packet,
89                              I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
90         if (dma_mapping_error(dev, dma))
91                 goto dma_fail;
92
93         /* grab the next descriptor */
94         i = tx_ring->next_to_use;
95         fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
96         first = &tx_ring->tx_bi[i];
97         memset(first, 0, sizeof(struct i40e_tx_buffer));
98
99         tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
100
101         fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
102               I40E_TXD_FLTR_QW0_QINDEX_MASK;
103
104         fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
105                I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
106
107         fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
108                I40E_TXD_FLTR_QW0_PCTYPE_MASK;
109
110         /* Use LAN VSI Id if not programmed by user */
111         if (fdir_data->dest_vsi == 0)
112                 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
113                        I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
114         else
115                 fpt |= ((u32)fdir_data->dest_vsi <<
116                         I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
117                        I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
118
119         dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
120
121         if (add)
122                 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
123                        I40E_TXD_FLTR_QW1_PCMD_SHIFT;
124         else
125                 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
126                        I40E_TXD_FLTR_QW1_PCMD_SHIFT;
127
128         dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
129                I40E_TXD_FLTR_QW1_DEST_MASK;
130
131         dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
132                I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
133
134         if (fdir_data->cnt_index != 0) {
135                 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
136                 dcc |= ((u32)fdir_data->cnt_index <<
137                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
138                         I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
139         }
140
141         fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
142         fdir_desc->rsvd = cpu_to_le32(0);
143         fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
144         fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
145
146         /* Now program a dummy descriptor */
147         i = tx_ring->next_to_use;
148         tx_desc = I40E_TX_DESC(tx_ring, i);
149         tx_buf = &tx_ring->tx_bi[i];
150
151         tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
152
153         memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
154
155         /* record length, and DMA address */
156         dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
157         dma_unmap_addr_set(tx_buf, dma, dma);
158
159         tx_desc->buffer_addr = cpu_to_le64(dma);
160         td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
161
162         tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
163         tx_buf->raw_buf = (void *)raw_packet;
164
165         tx_desc->cmd_type_offset_bsz =
166                 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
167
168         /* Force memory writes to complete before letting h/w
169          * know there are new descriptors to fetch.
170          */
171         wmb();
172
173         /* Mark the data descriptor to be watched */
174         first->next_to_watch = tx_desc;
175
176         writel(tx_ring->next_to_use, tx_ring->tail);
177         return 0;
178
179 dma_fail:
180         return -1;
181 }
182
183 #define IP_HEADER_OFFSET 14
184 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
185 /**
186  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
187  * @vsi: pointer to the targeted VSI
188  * @fd_data: the flow director data required for the FDir descriptor
189  * @add: true adds a filter, false removes it
190  *
191  * Returns 0 if the filters were successfully added or removed
192  **/
193 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
194                                    struct i40e_fdir_filter *fd_data,
195                                    bool add)
196 {
197         struct i40e_pf *pf = vsi->back;
198         struct udphdr *udp;
199         struct iphdr *ip;
200         bool err = false;
201         u8 *raw_packet;
202         int ret;
203         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
204                 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
205                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
206
207         raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
208         if (!raw_packet)
209                 return -ENOMEM;
210         memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
211
212         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
213         udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
214               + sizeof(struct iphdr));
215
216         ip->daddr = fd_data->dst_ip[0];
217         udp->dest = fd_data->dst_port;
218         ip->saddr = fd_data->src_ip[0];
219         udp->source = fd_data->src_port;
220
221         fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
222         ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
223         if (ret) {
224                 dev_info(&pf->pdev->dev,
225                          "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
226                          fd_data->pctype, fd_data->fd_id, ret);
227                 err = true;
228         } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
229                 if (add)
230                         dev_info(&pf->pdev->dev,
231                                  "Filter OK for PCTYPE %d loc = %d\n",
232                                  fd_data->pctype, fd_data->fd_id);
233                 else
234                         dev_info(&pf->pdev->dev,
235                                  "Filter deleted for PCTYPE %d loc = %d\n",
236                                  fd_data->pctype, fd_data->fd_id);
237         }
238         if (err)
239                 kfree(raw_packet);
240
241         return err ? -EOPNOTSUPP : 0;
242 }
243
244 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
245 /**
246  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
247  * @vsi: pointer to the targeted VSI
248  * @fd_data: the flow director data required for the FDir descriptor
249  * @add: true adds a filter, false removes it
250  *
251  * Returns 0 if the filters were successfully added or removed
252  **/
253 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
254                                    struct i40e_fdir_filter *fd_data,
255                                    bool add)
256 {
257         struct i40e_pf *pf = vsi->back;
258         struct tcphdr *tcp;
259         struct iphdr *ip;
260         bool err = false;
261         u8 *raw_packet;
262         int ret;
263         /* Dummy packet */
264         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
265                 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
266                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
267                 0x0, 0x72, 0, 0, 0, 0};
268
269         raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
270         if (!raw_packet)
271                 return -ENOMEM;
272         memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
273
274         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
275         tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
276               + sizeof(struct iphdr));
277
278         ip->daddr = fd_data->dst_ip[0];
279         tcp->dest = fd_data->dst_port;
280         ip->saddr = fd_data->src_ip[0];
281         tcp->source = fd_data->src_port;
282
283         if (add) {
284                 pf->fd_tcp_rule++;
285                 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
286                         if (I40E_DEBUG_FD & pf->hw.debug_mask)
287                                 dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
288                         pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
289                 }
290         } else {
291                 pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ?
292                                   (pf->fd_tcp_rule - 1) : 0;
293                 if (pf->fd_tcp_rule == 0) {
294                         pf->flags |= I40E_FLAG_FD_ATR_ENABLED;
295                         if (I40E_DEBUG_FD & pf->hw.debug_mask)
296                                 dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n");
297                 }
298         }
299
300         fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
301         ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
302
303         if (ret) {
304                 dev_info(&pf->pdev->dev,
305                          "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
306                          fd_data->pctype, fd_data->fd_id, ret);
307                 err = true;
308         } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
309                 if (add)
310                         dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
311                                  fd_data->pctype, fd_data->fd_id);
312                 else
313                         dev_info(&pf->pdev->dev,
314                                  "Filter deleted for PCTYPE %d loc = %d\n",
315                                  fd_data->pctype, fd_data->fd_id);
316         }
317
318         if (err)
319                 kfree(raw_packet);
320
321         return err ? -EOPNOTSUPP : 0;
322 }
323
324 /**
325  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
326  * a specific flow spec
327  * @vsi: pointer to the targeted VSI
328  * @fd_data: the flow director data required for the FDir descriptor
329  * @add: true adds a filter, false removes it
330  *
331  * Returns 0 if the filters were successfully added or removed
332  **/
333 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
334                                     struct i40e_fdir_filter *fd_data,
335                                     bool add)
336 {
337         return -EOPNOTSUPP;
338 }
339
340 #define I40E_IP_DUMMY_PACKET_LEN 34
341 /**
342  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
343  * a specific flow spec
344  * @vsi: pointer to the targeted VSI
345  * @fd_data: the flow director data required for the FDir descriptor
346  * @add: true adds a filter, false removes it
347  *
348  * Returns 0 if the filters were successfully added or removed
349  **/
350 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
351                                   struct i40e_fdir_filter *fd_data,
352                                   bool add)
353 {
354         struct i40e_pf *pf = vsi->back;
355         struct iphdr *ip;
356         bool err = false;
357         u8 *raw_packet;
358         int ret;
359         int i;
360         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
361                 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
362                 0, 0, 0, 0};
363
364         for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
365              i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
366                 raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
367                 if (!raw_packet)
368                         return -ENOMEM;
369                 memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
370                 ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
371
372                 ip->saddr = fd_data->src_ip[0];
373                 ip->daddr = fd_data->dst_ip[0];
374                 ip->protocol = 0;
375
376                 fd_data->pctype = i;
377                 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
378
379                 if (ret) {
380                         dev_info(&pf->pdev->dev,
381                                  "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
382                                  fd_data->pctype, fd_data->fd_id, ret);
383                         err = true;
384                 } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
385                         if (add)
386                                 dev_info(&pf->pdev->dev,
387                                          "Filter OK for PCTYPE %d loc = %d\n",
388                                          fd_data->pctype, fd_data->fd_id);
389                         else
390                                 dev_info(&pf->pdev->dev,
391                                          "Filter deleted for PCTYPE %d loc = %d\n",
392                                          fd_data->pctype, fd_data->fd_id);
393                 }
394         }
395
396         if (err)
397                 kfree(raw_packet);
398
399         return err ? -EOPNOTSUPP : 0;
400 }
401
402 /**
403  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
404  * @vsi: pointer to the targeted VSI
405  * @cmd: command to get or set RX flow classification rules
406  * @add: true adds a filter, false removes it
407  *
408  **/
409 int i40e_add_del_fdir(struct i40e_vsi *vsi,
410                       struct i40e_fdir_filter *input, bool add)
411 {
412         struct i40e_pf *pf = vsi->back;
413         int ret;
414
415         switch (input->flow_type & ~FLOW_EXT) {
416         case TCP_V4_FLOW:
417                 ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
418                 break;
419         case UDP_V4_FLOW:
420                 ret = i40e_add_del_fdir_udpv4(vsi, input, add);
421                 break;
422         case SCTP_V4_FLOW:
423                 ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
424                 break;
425         case IPV4_FLOW:
426                 ret = i40e_add_del_fdir_ipv4(vsi, input, add);
427                 break;
428         case IP_USER_FLOW:
429                 switch (input->ip4_proto) {
430                 case IPPROTO_TCP:
431                         ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
432                         break;
433                 case IPPROTO_UDP:
434                         ret = i40e_add_del_fdir_udpv4(vsi, input, add);
435                         break;
436                 case IPPROTO_SCTP:
437                         ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
438                         break;
439                 default:
440                         ret = i40e_add_del_fdir_ipv4(vsi, input, add);
441                         break;
442                 }
443                 break;
444         default:
445                 dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
446                          input->flow_type);
447                 ret = -EINVAL;
448         }
449
450         /* The buffer allocated here is freed by the i40e_clean_tx_ring() */
451         return ret;
452 }
453
454 /**
455  * i40e_fd_handle_status - check the Programming Status for FD
456  * @rx_ring: the Rx ring for this descriptor
457  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
458  * @prog_id: the id originally used for programming
459  *
460  * This is used to verify if the FD programming or invalidation
461  * requested by SW to the HW is successful or not and take actions accordingly.
462  **/
463 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
464                                   union i40e_rx_desc *rx_desc, u8 prog_id)
465 {
466         struct i40e_pf *pf = rx_ring->vsi->back;
467         struct pci_dev *pdev = pf->pdev;
468         u32 fcnt_prog, fcnt_avail;
469         u32 error;
470         u64 qw;
471
472         qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
473         error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
474                 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
475
476         if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
477                 pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
478                 if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
479                     (I40E_DEBUG_FD & pf->hw.debug_mask))
480                         dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
481                                  pf->fd_inv);
482
483                 /* Check if the programming error is for ATR.
484                  * If so, auto disable ATR and set a state for
485                  * flush in progress. Next time we come here if flush is in
486                  * progress do nothing, once flush is complete the state will
487                  * be cleared.
488                  */
489                 if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state))
490                         return;
491
492                 pf->fd_add_err++;
493                 /* store the current atr filter count */
494                 pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
495
496                 if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
497                     (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) {
498                         pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED;
499                         set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state);
500                 }
501
502                 /* filter programming failed most likely due to table full */
503                 fcnt_prog = i40e_get_global_fd_count(pf);
504                 fcnt_avail = pf->fdir_pf_filter_count;
505                 /* If ATR is running fcnt_prog can quickly change,
506                  * if we are very close to full, it makes sense to disable
507                  * FD ATR/SB and then re-enable it when there is room.
508                  */
509                 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
510                         if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
511                             !(pf->auto_disable_flags &
512                                      I40E_FLAG_FD_SB_ENABLED)) {
513                                 if (I40E_DEBUG_FD & pf->hw.debug_mask)
514                                         dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
515                                 pf->auto_disable_flags |=
516                                                         I40E_FLAG_FD_SB_ENABLED;
517                         }
518                 }
519         } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
520                 if (I40E_DEBUG_FD & pf->hw.debug_mask)
521                         dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
522                                  rx_desc->wb.qword0.hi_dword.fd_id);
523         }
524 }
525
526 /**
527  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
528  * @ring:      the ring that owns the buffer
529  * @tx_buffer: the buffer to free
530  **/
531 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
532                                             struct i40e_tx_buffer *tx_buffer)
533 {
534         if (tx_buffer->skb) {
535                 dev_kfree_skb_any(tx_buffer->skb);
536                 if (dma_unmap_len(tx_buffer, len))
537                         dma_unmap_single(ring->dev,
538                                          dma_unmap_addr(tx_buffer, dma),
539                                          dma_unmap_len(tx_buffer, len),
540                                          DMA_TO_DEVICE);
541         } else if (dma_unmap_len(tx_buffer, len)) {
542                 dma_unmap_page(ring->dev,
543                                dma_unmap_addr(tx_buffer, dma),
544                                dma_unmap_len(tx_buffer, len),
545                                DMA_TO_DEVICE);
546         }
547
548         if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
549                 kfree(tx_buffer->raw_buf);
550
551         tx_buffer->next_to_watch = NULL;
552         tx_buffer->skb = NULL;
553         dma_unmap_len_set(tx_buffer, len, 0);
554         /* tx_buffer must be completely set up in the transmit path */
555 }
556
557 /**
558  * i40e_clean_tx_ring - Free any empty Tx buffers
559  * @tx_ring: ring to be cleaned
560  **/
561 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
562 {
563         unsigned long bi_size;
564         u16 i;
565
566         /* ring already cleared, nothing to do */
567         if (!tx_ring->tx_bi)
568                 return;
569
570         /* Free all the Tx ring sk_buffs */
571         for (i = 0; i < tx_ring->count; i++)
572                 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
573
574         bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
575         memset(tx_ring->tx_bi, 0, bi_size);
576
577         /* Zero out the descriptor ring */
578         memset(tx_ring->desc, 0, tx_ring->size);
579
580         tx_ring->next_to_use = 0;
581         tx_ring->next_to_clean = 0;
582
583         if (!tx_ring->netdev)
584                 return;
585
586         /* cleanup Tx queue statistics */
587         netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
588                                                   tx_ring->queue_index));
589 }
590
591 /**
592  * i40e_free_tx_resources - Free Tx resources per queue
593  * @tx_ring: Tx descriptor ring for a specific queue
594  *
595  * Free all transmit software resources
596  **/
597 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
598 {
599         i40e_clean_tx_ring(tx_ring);
600         kfree(tx_ring->tx_bi);
601         tx_ring->tx_bi = NULL;
602
603         if (tx_ring->desc) {
604                 dma_free_coherent(tx_ring->dev, tx_ring->size,
605                                   tx_ring->desc, tx_ring->dma);
606                 tx_ring->desc = NULL;
607         }
608 }
609
610 /**
611  * i40e_get_tx_pending - how many tx descriptors not processed
612  * @tx_ring: the ring of descriptors
613  * @in_sw: is tx_pending being checked in SW or HW
614  *
615  * Since there is no access to the ring head register
616  * in XL710, we need to use our local copies
617  **/
618 u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
619 {
620         u32 head, tail;
621
622         if (!in_sw)
623                 head = i40e_get_head(ring);
624         else
625                 head = ring->next_to_clean;
626         tail = readl(ring->tail);
627
628         if (head != tail)
629                 return (head < tail) ?
630                         tail - head : (tail + ring->count - head);
631
632         return 0;
633 }
634
635 #define WB_STRIDE 0x3
636
637 /**
638  * i40e_clean_tx_irq - Reclaim resources after transmit completes
639  * @tx_ring:  tx ring to clean
640  * @budget:   how many cleans we're allowed
641  *
642  * Returns true if there's any budget left (e.g. the clean is finished)
643  **/
644 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
645 {
646         u16 i = tx_ring->next_to_clean;
647         struct i40e_tx_buffer *tx_buf;
648         struct i40e_tx_desc *tx_head;
649         struct i40e_tx_desc *tx_desc;
650         unsigned int total_packets = 0;
651         unsigned int total_bytes = 0;
652
653         tx_buf = &tx_ring->tx_bi[i];
654         tx_desc = I40E_TX_DESC(tx_ring, i);
655         i -= tx_ring->count;
656
657         tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
658
659         do {
660                 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
661
662                 /* if next_to_watch is not set then there is no work pending */
663                 if (!eop_desc)
664                         break;
665
666                 /* prevent any other reads prior to eop_desc */
667                 read_barrier_depends();
668
669                 /* we have caught up to head, no work left to do */
670                 if (tx_head == tx_desc)
671                         break;
672
673                 /* clear next_to_watch to prevent false hangs */
674                 tx_buf->next_to_watch = NULL;
675
676                 /* update the statistics for this packet */
677                 total_bytes += tx_buf->bytecount;
678                 total_packets += tx_buf->gso_segs;
679
680                 /* free the skb */
681                 dev_consume_skb_any(tx_buf->skb);
682
683                 /* unmap skb header data */
684                 dma_unmap_single(tx_ring->dev,
685                                  dma_unmap_addr(tx_buf, dma),
686                                  dma_unmap_len(tx_buf, len),
687                                  DMA_TO_DEVICE);
688
689                 /* clear tx_buffer data */
690                 tx_buf->skb = NULL;
691                 dma_unmap_len_set(tx_buf, len, 0);
692
693                 /* unmap remaining buffers */
694                 while (tx_desc != eop_desc) {
695
696                         tx_buf++;
697                         tx_desc++;
698                         i++;
699                         if (unlikely(!i)) {
700                                 i -= tx_ring->count;
701                                 tx_buf = tx_ring->tx_bi;
702                                 tx_desc = I40E_TX_DESC(tx_ring, 0);
703                         }
704
705                         /* unmap any remaining paged data */
706                         if (dma_unmap_len(tx_buf, len)) {
707                                 dma_unmap_page(tx_ring->dev,
708                                                dma_unmap_addr(tx_buf, dma),
709                                                dma_unmap_len(tx_buf, len),
710                                                DMA_TO_DEVICE);
711                                 dma_unmap_len_set(tx_buf, len, 0);
712                         }
713                 }
714
715                 /* move us one more past the eop_desc for start of next pkt */
716                 tx_buf++;
717                 tx_desc++;
718                 i++;
719                 if (unlikely(!i)) {
720                         i -= tx_ring->count;
721                         tx_buf = tx_ring->tx_bi;
722                         tx_desc = I40E_TX_DESC(tx_ring, 0);
723                 }
724
725                 prefetch(tx_desc);
726
727                 /* update budget accounting */
728                 budget--;
729         } while (likely(budget));
730
731         i += tx_ring->count;
732         tx_ring->next_to_clean = i;
733         u64_stats_update_begin(&tx_ring->syncp);
734         tx_ring->stats.bytes += total_bytes;
735         tx_ring->stats.packets += total_packets;
736         u64_stats_update_end(&tx_ring->syncp);
737         tx_ring->q_vector->tx.total_bytes += total_bytes;
738         tx_ring->q_vector->tx.total_packets += total_packets;
739
740         if (tx_ring->flags & I40E_TXR_FLAGS_WB_ON_ITR) {
741                 unsigned int j = 0;
742
743                 /* check to see if there are < 4 descriptors
744                  * waiting to be written back, then kick the hardware to force
745                  * them to be written back in case we stay in NAPI.
746                  * In this mode on X722 we do not enable Interrupt.
747                  */
748                 j = i40e_get_tx_pending(tx_ring, false);
749
750                 if (budget &&
751                     ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
752                     !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
753                     (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
754                         tx_ring->arm_wb = true;
755         }
756
757         netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
758                                                       tx_ring->queue_index),
759                                   total_packets, total_bytes);
760
761 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
762         if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
763                      (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
764                 /* Make sure that anybody stopping the queue after this
765                  * sees the new next_to_clean.
766                  */
767                 smp_mb();
768                 if (__netif_subqueue_stopped(tx_ring->netdev,
769                                              tx_ring->queue_index) &&
770                    !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
771                         netif_wake_subqueue(tx_ring->netdev,
772                                             tx_ring->queue_index);
773                         ++tx_ring->tx_stats.restart_queue;
774                 }
775         }
776
777         return !!budget;
778 }
779
780 /**
781  * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
782  * @vsi: the VSI we care about
783  * @q_vector: the vector on which to enable writeback
784  *
785  **/
786 static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
787                                   struct i40e_q_vector *q_vector)
788 {
789         u16 flags = q_vector->tx.ring[0].flags;
790         u32 val;
791
792         if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
793                 return;
794
795         if (q_vector->arm_wb_state)
796                 return;
797
798         if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
799                 val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
800                       I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
801
802                 wr32(&vsi->back->hw,
803                      I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
804                      val);
805         } else {
806                 val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
807                       I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
808
809                 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
810         }
811         q_vector->arm_wb_state = true;
812 }
813
814 /**
815  * i40e_force_wb - Issue SW Interrupt so HW does a wb
816  * @vsi: the VSI we care about
817  * @q_vector: the vector  on which to force writeback
818  *
819  **/
820 void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
821 {
822         if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
823                 u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
824                           I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
825                           I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
826                           I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
827                           /* allow 00 to be written to the index */
828
829                 wr32(&vsi->back->hw,
830                      I40E_PFINT_DYN_CTLN(q_vector->v_idx +
831                                          vsi->base_vector - 1), val);
832         } else {
833                 u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
834                           I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
835                           I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
836                           I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
837                         /* allow 00 to be written to the index */
838
839                 wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
840         }
841 }
842
843 /**
844  * i40e_set_new_dynamic_itr - Find new ITR level
845  * @rc: structure containing ring performance data
846  *
847  * Returns true if ITR changed, false if not
848  *
849  * Stores a new ITR value based on packets and byte counts during
850  * the last interrupt.  The advantage of per interrupt computation
851  * is faster updates and more accurate ITR for the current traffic
852  * pattern.  Constants in this function were computed based on
853  * theoretical maximum wire speed and thresholds were set based on
854  * testing data as well as attempting to minimize response time
855  * while increasing bulk throughput.
856  **/
857 static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
858 {
859         enum i40e_latency_range new_latency_range = rc->latency_range;
860         struct i40e_q_vector *qv = rc->ring->q_vector;
861         u32 new_itr = rc->itr;
862         int bytes_per_int;
863         int usecs;
864
865         if (rc->total_packets == 0 || !rc->itr)
866                 return false;
867
868         /* simple throttlerate management
869          *   0-10MB/s   lowest (50000 ints/s)
870          *  10-20MB/s   low    (20000 ints/s)
871          *  20-1249MB/s bulk   (18000 ints/s)
872          *  > 40000 Rx packets per second (8000 ints/s)
873          *
874          * The math works out because the divisor is in 10^(-6) which
875          * turns the bytes/us input value into MB/s values, but
876          * make sure to use usecs, as the register values written
877          * are in 2 usec increments in the ITR registers, and make sure
878          * to use the smoothed values that the countdown timer gives us.
879          */
880         usecs = (rc->itr << 1) * ITR_COUNTDOWN_START;
881         bytes_per_int = rc->total_bytes / usecs;
882
883         switch (new_latency_range) {
884         case I40E_LOWEST_LATENCY:
885                 if (bytes_per_int > 10)
886                         new_latency_range = I40E_LOW_LATENCY;
887                 break;
888         case I40E_LOW_LATENCY:
889                 if (bytes_per_int > 20)
890                         new_latency_range = I40E_BULK_LATENCY;
891                 else if (bytes_per_int <= 10)
892                         new_latency_range = I40E_LOWEST_LATENCY;
893                 break;
894         case I40E_BULK_LATENCY:
895         case I40E_ULTRA_LATENCY:
896         default:
897                 if (bytes_per_int <= 20)
898                         new_latency_range = I40E_LOW_LATENCY;
899                 break;
900         }
901
902         /* this is to adjust RX more aggressively when streaming small
903          * packets.  The value of 40000 was picked as it is just beyond
904          * what the hardware can receive per second if in low latency
905          * mode.
906          */
907 #define RX_ULTRA_PACKET_RATE 40000
908
909         if ((((rc->total_packets * 1000000) / usecs) > RX_ULTRA_PACKET_RATE) &&
910             (&qv->rx == rc))
911                 new_latency_range = I40E_ULTRA_LATENCY;
912
913         rc->latency_range = new_latency_range;
914
915         switch (new_latency_range) {
916         case I40E_LOWEST_LATENCY:
917                 new_itr = I40E_ITR_50K;
918                 break;
919         case I40E_LOW_LATENCY:
920                 new_itr = I40E_ITR_20K;
921                 break;
922         case I40E_BULK_LATENCY:
923                 new_itr = I40E_ITR_18K;
924                 break;
925         case I40E_ULTRA_LATENCY:
926                 new_itr = I40E_ITR_8K;
927                 break;
928         default:
929                 break;
930         }
931
932         rc->total_bytes = 0;
933         rc->total_packets = 0;
934
935         if (new_itr != rc->itr) {
936                 rc->itr = new_itr;
937                 return true;
938         }
939
940         return false;
941 }
942
943 /**
944  * i40e_clean_programming_status - clean the programming status descriptor
945  * @rx_ring: the rx ring that has this descriptor
946  * @rx_desc: the rx descriptor written back by HW
947  *
948  * Flow director should handle FD_FILTER_STATUS to check its filter programming
949  * status being successful or not and take actions accordingly. FCoE should
950  * handle its context/filter programming/invalidation status and take actions.
951  *
952  **/
953 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
954                                           union i40e_rx_desc *rx_desc)
955 {
956         u64 qw;
957         u8 id;
958
959         qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
960         id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
961                   I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
962
963         if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
964                 i40e_fd_handle_status(rx_ring, rx_desc, id);
965 #ifdef I40E_FCOE
966         else if ((id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS) ||
967                  (id == I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS))
968                 i40e_fcoe_handle_status(rx_ring, rx_desc, id);
969 #endif
970 }
971
972 /**
973  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
974  * @tx_ring: the tx ring to set up
975  *
976  * Return 0 on success, negative on error
977  **/
978 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
979 {
980         struct device *dev = tx_ring->dev;
981         int bi_size;
982
983         if (!dev)
984                 return -ENOMEM;
985
986         /* warn if we are about to overwrite the pointer */
987         WARN_ON(tx_ring->tx_bi);
988         bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
989         tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
990         if (!tx_ring->tx_bi)
991                 goto err;
992
993         /* round up to nearest 4K */
994         tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
995         /* add u32 for head writeback, align after this takes care of
996          * guaranteeing this is at least one cache line in size
997          */
998         tx_ring->size += sizeof(u32);
999         tx_ring->size = ALIGN(tx_ring->size, 4096);
1000         tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
1001                                            &tx_ring->dma, GFP_KERNEL);
1002         if (!tx_ring->desc) {
1003                 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
1004                          tx_ring->size);
1005                 goto err;
1006         }
1007
1008         tx_ring->next_to_use = 0;
1009         tx_ring->next_to_clean = 0;
1010         return 0;
1011
1012 err:
1013         kfree(tx_ring->tx_bi);
1014         tx_ring->tx_bi = NULL;
1015         return -ENOMEM;
1016 }
1017
1018 /**
1019  * i40e_clean_rx_ring - Free Rx buffers
1020  * @rx_ring: ring to be cleaned
1021  **/
1022 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
1023 {
1024         struct device *dev = rx_ring->dev;
1025         struct i40e_rx_buffer *rx_bi;
1026         unsigned long bi_size;
1027         u16 i;
1028
1029         /* ring already cleared, nothing to do */
1030         if (!rx_ring->rx_bi)
1031                 return;
1032
1033         if (ring_is_ps_enabled(rx_ring)) {
1034                 int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
1035
1036                 rx_bi = &rx_ring->rx_bi[0];
1037                 if (rx_bi->hdr_buf) {
1038                         dma_free_coherent(dev,
1039                                           bufsz,
1040                                           rx_bi->hdr_buf,
1041                                           rx_bi->dma);
1042                         for (i = 0; i < rx_ring->count; i++) {
1043                                 rx_bi = &rx_ring->rx_bi[i];
1044                                 rx_bi->dma = 0;
1045                                 rx_bi->hdr_buf = NULL;
1046                         }
1047                 }
1048         }
1049         /* Free all the Rx ring sk_buffs */
1050         for (i = 0; i < rx_ring->count; i++) {
1051                 rx_bi = &rx_ring->rx_bi[i];
1052                 if (rx_bi->dma) {
1053                         dma_unmap_single(dev,
1054                                          rx_bi->dma,
1055                                          rx_ring->rx_buf_len,
1056                                          DMA_FROM_DEVICE);
1057                         rx_bi->dma = 0;
1058                 }
1059                 if (rx_bi->skb) {
1060                         dev_kfree_skb(rx_bi->skb);
1061                         rx_bi->skb = NULL;
1062                 }
1063                 if (rx_bi->page) {
1064                         if (rx_bi->page_dma) {
1065                                 dma_unmap_page(dev,
1066                                                rx_bi->page_dma,
1067                                                PAGE_SIZE,
1068                                                DMA_FROM_DEVICE);
1069                                 rx_bi->page_dma = 0;
1070                         }
1071                         __free_page(rx_bi->page);
1072                         rx_bi->page = NULL;
1073                         rx_bi->page_offset = 0;
1074                 }
1075         }
1076
1077         bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1078         memset(rx_ring->rx_bi, 0, bi_size);
1079
1080         /* Zero out the descriptor ring */
1081         memset(rx_ring->desc, 0, rx_ring->size);
1082
1083         rx_ring->next_to_clean = 0;
1084         rx_ring->next_to_use = 0;
1085 }
1086
1087 /**
1088  * i40e_free_rx_resources - Free Rx resources
1089  * @rx_ring: ring to clean the resources from
1090  *
1091  * Free all receive software resources
1092  **/
1093 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1094 {
1095         i40e_clean_rx_ring(rx_ring);
1096         kfree(rx_ring->rx_bi);
1097         rx_ring->rx_bi = NULL;
1098
1099         if (rx_ring->desc) {
1100                 dma_free_coherent(rx_ring->dev, rx_ring->size,
1101                                   rx_ring->desc, rx_ring->dma);
1102                 rx_ring->desc = NULL;
1103         }
1104 }
1105
1106 /**
1107  * i40e_alloc_rx_headers - allocate rx header buffers
1108  * @rx_ring: ring to alloc buffers
1109  *
1110  * Allocate rx header buffers for the entire ring. As these are static,
1111  * this is only called when setting up a new ring.
1112  **/
1113 void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
1114 {
1115         struct device *dev = rx_ring->dev;
1116         struct i40e_rx_buffer *rx_bi;
1117         dma_addr_t dma;
1118         void *buffer;
1119         int buf_size;
1120         int i;
1121
1122         if (rx_ring->rx_bi[0].hdr_buf)
1123                 return;
1124         /* Make sure the buffers don't cross cache line boundaries. */
1125         buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
1126         buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
1127                                     &dma, GFP_KERNEL);
1128         if (!buffer)
1129                 return;
1130         for (i = 0; i < rx_ring->count; i++) {
1131                 rx_bi = &rx_ring->rx_bi[i];
1132                 rx_bi->dma = dma + (i * buf_size);
1133                 rx_bi->hdr_buf = buffer + (i * buf_size);
1134         }
1135 }
1136
1137 /**
1138  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1139  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1140  *
1141  * Returns 0 on success, negative on failure
1142  **/
1143 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1144 {
1145         struct device *dev = rx_ring->dev;
1146         int bi_size;
1147
1148         /* warn if we are about to overwrite the pointer */
1149         WARN_ON(rx_ring->rx_bi);
1150         bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1151         rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1152         if (!rx_ring->rx_bi)
1153                 goto err;
1154
1155         u64_stats_init(&rx_ring->syncp);
1156
1157         /* Round up to nearest 4K */
1158         rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1159                 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1160                 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1161         rx_ring->size = ALIGN(rx_ring->size, 4096);
1162         rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1163                                            &rx_ring->dma, GFP_KERNEL);
1164
1165         if (!rx_ring->desc) {
1166                 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1167                          rx_ring->size);
1168                 goto err;
1169         }
1170
1171         rx_ring->next_to_clean = 0;
1172         rx_ring->next_to_use = 0;
1173
1174         return 0;
1175 err:
1176         kfree(rx_ring->rx_bi);
1177         rx_ring->rx_bi = NULL;
1178         return -ENOMEM;
1179 }
1180
1181 /**
1182  * i40e_release_rx_desc - Store the new tail and head values
1183  * @rx_ring: ring to bump
1184  * @val: new head index
1185  **/
1186 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1187 {
1188         rx_ring->next_to_use = val;
1189         /* Force memory writes to complete before letting h/w
1190          * know there are new descriptors to fetch.  (Only
1191          * applicable for weak-ordered memory model archs,
1192          * such as IA-64).
1193          */
1194         wmb();
1195         writel(val, rx_ring->tail);
1196 }
1197
1198 /**
1199  * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
1200  * @rx_ring: ring to place buffers on
1201  * @cleaned_count: number of buffers to replace
1202  *
1203  * Returns true if any errors on allocation
1204  **/
1205 bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
1206 {
1207         u16 i = rx_ring->next_to_use;
1208         union i40e_rx_desc *rx_desc;
1209         struct i40e_rx_buffer *bi;
1210         const int current_node = numa_node_id();
1211
1212         /* do nothing if no valid netdev defined */
1213         if (!rx_ring->netdev || !cleaned_count)
1214                 return false;
1215
1216         while (cleaned_count--) {
1217                 rx_desc = I40E_RX_DESC(rx_ring, i);
1218                 bi = &rx_ring->rx_bi[i];
1219
1220                 if (bi->skb) /* desc is in use */
1221                         goto no_buffers;
1222
1223         /* If we've been moved to a different NUMA node, release the
1224          * page so we can get a new one on the current node.
1225          */
1226                 if (bi->page &&  page_to_nid(bi->page) != current_node) {
1227                         dma_unmap_page(rx_ring->dev,
1228                                        bi->page_dma,
1229                                        PAGE_SIZE,
1230                                        DMA_FROM_DEVICE);
1231                         __free_page(bi->page);
1232                         bi->page = NULL;
1233                         bi->page_dma = 0;
1234                         rx_ring->rx_stats.realloc_count++;
1235                 } else if (bi->page) {
1236                         rx_ring->rx_stats.page_reuse_count++;
1237                 }
1238
1239                 if (!bi->page) {
1240                         bi->page = alloc_page(GFP_ATOMIC);
1241                         if (!bi->page) {
1242                                 rx_ring->rx_stats.alloc_page_failed++;
1243                                 goto no_buffers;
1244                         }
1245                         bi->page_dma = dma_map_page(rx_ring->dev,
1246                                                     bi->page,
1247                                                     0,
1248                                                     PAGE_SIZE,
1249                                                     DMA_FROM_DEVICE);
1250                         if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
1251                                 rx_ring->rx_stats.alloc_page_failed++;
1252                                 __free_page(bi->page);
1253                                 bi->page = NULL;
1254                                 bi->page_dma = 0;
1255                                 bi->page_offset = 0;
1256                                 goto no_buffers;
1257                         }
1258                         bi->page_offset = 0;
1259                 }
1260
1261                 /* Refresh the desc even if buffer_addrs didn't change
1262                  * because each write-back erases this info.
1263                  */
1264                 rx_desc->read.pkt_addr =
1265                                 cpu_to_le64(bi->page_dma + bi->page_offset);
1266                 rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1267                 i++;
1268                 if (i == rx_ring->count)
1269                         i = 0;
1270         }
1271
1272         if (rx_ring->next_to_use != i)
1273                 i40e_release_rx_desc(rx_ring, i);
1274
1275         return false;
1276
1277 no_buffers:
1278         if (rx_ring->next_to_use != i)
1279                 i40e_release_rx_desc(rx_ring, i);
1280
1281         /* make sure to come back via polling to try again after
1282          * allocation failure
1283          */
1284         return true;
1285 }
1286
1287 /**
1288  * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
1289  * @rx_ring: ring to place buffers on
1290  * @cleaned_count: number of buffers to replace
1291  *
1292  * Returns true if any errors on allocation
1293  **/
1294 bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
1295 {
1296         u16 i = rx_ring->next_to_use;
1297         union i40e_rx_desc *rx_desc;
1298         struct i40e_rx_buffer *bi;
1299         struct sk_buff *skb;
1300
1301         /* do nothing if no valid netdev defined */
1302         if (!rx_ring->netdev || !cleaned_count)
1303                 return false;
1304
1305         while (cleaned_count--) {
1306                 rx_desc = I40E_RX_DESC(rx_ring, i);
1307                 bi = &rx_ring->rx_bi[i];
1308                 skb = bi->skb;
1309
1310                 if (!skb) {
1311                         skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1312                                                           rx_ring->rx_buf_len,
1313                                                           GFP_ATOMIC |
1314                                                           __GFP_NOWARN);
1315                         if (!skb) {
1316                                 rx_ring->rx_stats.alloc_buff_failed++;
1317                                 goto no_buffers;
1318                         }
1319                         /* initialize queue mapping */
1320                         skb_record_rx_queue(skb, rx_ring->queue_index);
1321                         bi->skb = skb;
1322                 }
1323
1324                 if (!bi->dma) {
1325                         bi->dma = dma_map_single(rx_ring->dev,
1326                                                  skb->data,
1327                                                  rx_ring->rx_buf_len,
1328                                                  DMA_FROM_DEVICE);
1329                         if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1330                                 rx_ring->rx_stats.alloc_buff_failed++;
1331                                 bi->dma = 0;
1332                                 dev_kfree_skb(bi->skb);
1333                                 bi->skb = NULL;
1334                                 goto no_buffers;
1335                         }
1336                 }
1337
1338                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1339                 rx_desc->read.hdr_addr = 0;
1340                 i++;
1341                 if (i == rx_ring->count)
1342                         i = 0;
1343         }
1344
1345         if (rx_ring->next_to_use != i)
1346                 i40e_release_rx_desc(rx_ring, i);
1347
1348         return false;
1349
1350 no_buffers:
1351         if (rx_ring->next_to_use != i)
1352                 i40e_release_rx_desc(rx_ring, i);
1353
1354         /* make sure to come back via polling to try again after
1355          * allocation failure
1356          */
1357         return true;
1358 }
1359
1360 /**
1361  * i40e_receive_skb - Send a completed packet up the stack
1362  * @rx_ring:  rx ring in play
1363  * @skb: packet to send up
1364  * @vlan_tag: vlan tag for packet
1365  **/
1366 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1367                              struct sk_buff *skb, u16 vlan_tag)
1368 {
1369         struct i40e_q_vector *q_vector = rx_ring->q_vector;
1370
1371         if (vlan_tag & VLAN_VID_MASK)
1372                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1373
1374         napi_gro_receive(&q_vector->napi, skb);
1375 }
1376
1377 /**
1378  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1379  * @vsi: the VSI we care about
1380  * @skb: skb currently being received and modified
1381  * @rx_status: status value of last descriptor in packet
1382  * @rx_error: error value of last descriptor in packet
1383  * @rx_ptype: ptype value of last descriptor in packet
1384  **/
1385 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1386                                     struct sk_buff *skb,
1387                                     u32 rx_status,
1388                                     u32 rx_error,
1389                                     u16 rx_ptype)
1390 {
1391         struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
1392         bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
1393
1394         skb->ip_summed = CHECKSUM_NONE;
1395
1396         /* Rx csum enabled and ip headers found? */
1397         if (!(vsi->netdev->features & NETIF_F_RXCSUM))
1398                 return;
1399
1400         /* did the hardware decode the packet and checksum? */
1401         if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1402                 return;
1403
1404         /* both known and outer_ip must be set for the below code to work */
1405         if (!(decoded.known && decoded.outer_ip))
1406                 return;
1407
1408         ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1409                (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
1410         ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
1411                (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
1412
1413         if (ipv4 &&
1414             (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
1415                          BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
1416                 goto checksum_fail;
1417
1418         /* likely incorrect csum if alternate IP extension headers found */
1419         if (ipv6 &&
1420             rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1421                 /* don't increment checksum err here, non-fatal err */
1422                 return;
1423
1424         /* there was some L4 error, count error and punt packet to the stack */
1425         if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
1426                 goto checksum_fail;
1427
1428         /* handle packets that were not able to be checksummed due
1429          * to arrival speed, in this case the stack can compute
1430          * the csum.
1431          */
1432         if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
1433                 return;
1434
1435         /* The hardware supported by this driver does not validate outer
1436          * checksums for tunneled VXLAN or GENEVE frames.  I don't agree
1437          * with it but the specification states that you "MAY validate", it
1438          * doesn't make it a hard requirement so if we have validated the
1439          * inner checksum report CHECKSUM_UNNECESSARY.
1440          */
1441
1442         ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1443                      (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1444         ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1445                      (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1446
1447         skb->ip_summed = CHECKSUM_UNNECESSARY;
1448         skb->csum_level = ipv4_tunnel || ipv6_tunnel;
1449
1450         return;
1451
1452 checksum_fail:
1453         vsi->back->hw_csum_rx_error++;
1454 }
1455
1456 /**
1457  * i40e_ptype_to_htype - get a hash type
1458  * @ptype: the ptype value from the descriptor
1459  *
1460  * Returns a hash type to be used by skb_set_hash
1461  **/
1462 static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
1463 {
1464         struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1465
1466         if (!decoded.known)
1467                 return PKT_HASH_TYPE_NONE;
1468
1469         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1470             decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1471                 return PKT_HASH_TYPE_L4;
1472         else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1473                  decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1474                 return PKT_HASH_TYPE_L3;
1475         else
1476                 return PKT_HASH_TYPE_L2;
1477 }
1478
1479 /**
1480  * i40e_rx_hash - set the hash value in the skb
1481  * @ring: descriptor ring
1482  * @rx_desc: specific descriptor
1483  **/
1484 static inline void i40e_rx_hash(struct i40e_ring *ring,
1485                                 union i40e_rx_desc *rx_desc,
1486                                 struct sk_buff *skb,
1487                                 u8 rx_ptype)
1488 {
1489         u32 hash;
1490         const __le64 rss_mask  =
1491                 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1492                             I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1493
1494         if (ring->netdev->features & NETIF_F_RXHASH)
1495                 return;
1496
1497         if ((rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask) {
1498                 hash = le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1499                 skb_set_hash(skb, hash, i40e_ptype_to_htype(rx_ptype));
1500         }
1501 }
1502
1503 /**
1504  * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
1505  * @rx_ring:  rx ring to clean
1506  * @budget:   how many cleans we're allowed
1507  *
1508  * Returns true if there's any budget left (e.g. the clean is finished)
1509  **/
1510 static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
1511 {
1512         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1513         u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1514         u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1515         struct i40e_vsi *vsi = rx_ring->vsi;
1516         u16 i = rx_ring->next_to_clean;
1517         union i40e_rx_desc *rx_desc;
1518         u32 rx_error, rx_status;
1519         bool failure = false;
1520         u8 rx_ptype;
1521         u64 qword;
1522         u32 copysize;
1523
1524         if (budget <= 0)
1525                 return 0;
1526
1527         do {
1528                 struct i40e_rx_buffer *rx_bi;
1529                 struct sk_buff *skb;
1530                 u16 vlan_tag;
1531                 /* return some buffers to hardware, one at a time is too slow */
1532                 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1533                         failure = failure ||
1534                                   i40e_alloc_rx_buffers_ps(rx_ring,
1535                                                            cleaned_count);
1536                         cleaned_count = 0;
1537                 }
1538
1539                 i = rx_ring->next_to_clean;
1540                 rx_desc = I40E_RX_DESC(rx_ring, i);
1541                 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1542                 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1543                         I40E_RXD_QW1_STATUS_SHIFT;
1544
1545                 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1546                         break;
1547
1548                 /* This memory barrier is needed to keep us from reading
1549                  * any other fields out of the rx_desc until we know the
1550                  * DD bit is set.
1551                  */
1552                 dma_rmb();
1553                 /* sync header buffer for reading */
1554                 dma_sync_single_range_for_cpu(rx_ring->dev,
1555                                               rx_ring->rx_bi[0].dma,
1556                                               i * rx_ring->rx_hdr_len,
1557                                               rx_ring->rx_hdr_len,
1558                                               DMA_FROM_DEVICE);
1559                 if (i40e_rx_is_programming_status(qword)) {
1560                         i40e_clean_programming_status(rx_ring, rx_desc);
1561                         I40E_RX_INCREMENT(rx_ring, i);
1562                         continue;
1563                 }
1564                 rx_bi = &rx_ring->rx_bi[i];
1565                 skb = rx_bi->skb;
1566                 if (likely(!skb)) {
1567                         skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
1568                                                           rx_ring->rx_hdr_len,
1569                                                           GFP_ATOMIC |
1570                                                           __GFP_NOWARN);
1571                         if (!skb) {
1572                                 rx_ring->rx_stats.alloc_buff_failed++;
1573                                 failure = true;
1574                                 break;
1575                         }
1576
1577                         /* initialize queue mapping */
1578                         skb_record_rx_queue(skb, rx_ring->queue_index);
1579                         /* we are reusing so sync this buffer for CPU use */
1580                         dma_sync_single_range_for_cpu(rx_ring->dev,
1581                                                       rx_ring->rx_bi[0].dma,
1582                                                       i * rx_ring->rx_hdr_len,
1583                                                       rx_ring->rx_hdr_len,
1584                                                       DMA_FROM_DEVICE);
1585                 }
1586                 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1587                                 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1588                 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1589                                 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1590                 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1591                          I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1592
1593                 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1594                            I40E_RXD_QW1_ERROR_SHIFT;
1595                 rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1596                 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1597
1598                 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1599                            I40E_RXD_QW1_PTYPE_SHIFT;
1600                 /* sync half-page for reading */
1601                 dma_sync_single_range_for_cpu(rx_ring->dev,
1602                                               rx_bi->page_dma,
1603                                               rx_bi->page_offset,
1604                                               PAGE_SIZE / 2,
1605                                               DMA_FROM_DEVICE);
1606                 prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
1607                 rx_bi->skb = NULL;
1608                 cleaned_count++;
1609                 copysize = 0;
1610                 if (rx_hbo || rx_sph) {
1611                         int len;
1612
1613                         if (rx_hbo)
1614                                 len = I40E_RX_HDR_SIZE;
1615                         else
1616                                 len = rx_header_len;
1617                         memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
1618                 } else if (skb->len == 0) {
1619                         int len;
1620                         unsigned char *va = page_address(rx_bi->page) +
1621                                             rx_bi->page_offset;
1622
1623                         len = min(rx_packet_len, rx_ring->rx_hdr_len);
1624                         memcpy(__skb_put(skb, len), va, len);
1625                         copysize = len;
1626                         rx_packet_len -= len;
1627                 }
1628                 /* Get the rest of the data if this was a header split */
1629                 if (rx_packet_len) {
1630                         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
1631                                         rx_bi->page,
1632                                         rx_bi->page_offset + copysize,
1633                                         rx_packet_len, I40E_RXBUFFER_2048);
1634
1635                         /* If the page count is more than 2, then both halves
1636                          * of the page are used and we need to free it. Do it
1637                          * here instead of in the alloc code. Otherwise one
1638                          * of the half-pages might be released between now and
1639                          * then, and we wouldn't know which one to use.
1640                          * Don't call get_page and free_page since those are
1641                          * both expensive atomic operations that just change
1642                          * the refcount in opposite directions. Just give the
1643                          * page to the stack; he can have our refcount.
1644                          */
1645                         if (page_count(rx_bi->page) > 2) {
1646                                 dma_unmap_page(rx_ring->dev,
1647                                                rx_bi->page_dma,
1648                                                PAGE_SIZE,
1649                                                DMA_FROM_DEVICE);
1650                                 rx_bi->page = NULL;
1651                                 rx_bi->page_dma = 0;
1652                                 rx_ring->rx_stats.realloc_count++;
1653                         } else {
1654                                 get_page(rx_bi->page);
1655                                 /* switch to the other half-page here; the
1656                                  * allocation code programs the right addr
1657                                  * into HW. If we haven't used this half-page,
1658                                  * the address won't be changed, and HW can
1659                                  * just use it next time through.
1660                                  */
1661                                 rx_bi->page_offset ^= PAGE_SIZE / 2;
1662                         }
1663
1664                 }
1665                 I40E_RX_INCREMENT(rx_ring, i);
1666
1667                 if (unlikely(
1668                     !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1669                         struct i40e_rx_buffer *next_buffer;
1670
1671                         next_buffer = &rx_ring->rx_bi[i];
1672                         next_buffer->skb = skb;
1673                         rx_ring->rx_stats.non_eop_descs++;
1674                         continue;
1675                 }
1676
1677                 /* ERR_MASK will only have valid bits if EOP set */
1678                 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1679                         dev_kfree_skb_any(skb);
1680                         continue;
1681                 }
1682
1683                 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1684
1685                 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1686                         i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1687                                            I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1688                                            I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1689                         rx_ring->last_rx_timestamp = jiffies;
1690                 }
1691
1692                 /* probably a little skewed due to removing CRC */
1693                 total_rx_bytes += skb->len;
1694                 total_rx_packets++;
1695
1696                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1697
1698                 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1699
1700                 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1701                          ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1702                          : 0;
1703 #ifdef I40E_FCOE
1704                 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1705                         dev_kfree_skb_any(skb);
1706                         continue;
1707                 }
1708 #endif
1709                 i40e_receive_skb(rx_ring, skb, vlan_tag);
1710
1711                 rx_desc->wb.qword1.status_error_len = 0;
1712
1713         } while (likely(total_rx_packets < budget));
1714
1715         u64_stats_update_begin(&rx_ring->syncp);
1716         rx_ring->stats.packets += total_rx_packets;
1717         rx_ring->stats.bytes += total_rx_bytes;
1718         u64_stats_update_end(&rx_ring->syncp);
1719         rx_ring->q_vector->rx.total_packets += total_rx_packets;
1720         rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1721
1722         return failure ? budget : total_rx_packets;
1723 }
1724
1725 /**
1726  * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
1727  * @rx_ring:  rx ring to clean
1728  * @budget:   how many cleans we're allowed
1729  *
1730  * Returns number of packets cleaned
1731  **/
1732 static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
1733 {
1734         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1735         u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1736         struct i40e_vsi *vsi = rx_ring->vsi;
1737         union i40e_rx_desc *rx_desc;
1738         u32 rx_error, rx_status;
1739         u16 rx_packet_len;
1740         bool failure = false;
1741         u8 rx_ptype;
1742         u64 qword;
1743         u16 i;
1744
1745         do {
1746                 struct i40e_rx_buffer *rx_bi;
1747                 struct sk_buff *skb;
1748                 u16 vlan_tag;
1749                 /* return some buffers to hardware, one at a time is too slow */
1750                 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1751                         failure = failure ||
1752                                   i40e_alloc_rx_buffers_1buf(rx_ring,
1753                                                              cleaned_count);
1754                         cleaned_count = 0;
1755                 }
1756
1757                 i = rx_ring->next_to_clean;
1758                 rx_desc = I40E_RX_DESC(rx_ring, i);
1759                 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1760                 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1761                         I40E_RXD_QW1_STATUS_SHIFT;
1762
1763                 if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
1764                         break;
1765
1766                 /* This memory barrier is needed to keep us from reading
1767                  * any other fields out of the rx_desc until we know the
1768                  * DD bit is set.
1769                  */
1770                 dma_rmb();
1771
1772                 if (i40e_rx_is_programming_status(qword)) {
1773                         i40e_clean_programming_status(rx_ring, rx_desc);
1774                         I40E_RX_INCREMENT(rx_ring, i);
1775                         continue;
1776                 }
1777                 rx_bi = &rx_ring->rx_bi[i];
1778                 skb = rx_bi->skb;
1779                 prefetch(skb->data);
1780
1781                 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1782                                 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1783
1784                 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1785                            I40E_RXD_QW1_ERROR_SHIFT;
1786                 rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
1787
1788                 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1789                            I40E_RXD_QW1_PTYPE_SHIFT;
1790                 rx_bi->skb = NULL;
1791                 cleaned_count++;
1792
1793                 /* Get the header and possibly the whole packet
1794                  * If this is an skb from previous receive dma will be 0
1795                  */
1796                 skb_put(skb, rx_packet_len);
1797                 dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
1798                                  DMA_FROM_DEVICE);
1799                 rx_bi->dma = 0;
1800
1801                 I40E_RX_INCREMENT(rx_ring, i);
1802
1803                 if (unlikely(
1804                     !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1805                         rx_ring->rx_stats.non_eop_descs++;
1806                         continue;
1807                 }
1808
1809                 /* ERR_MASK will only have valid bits if EOP set */
1810                 if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1811                         dev_kfree_skb_any(skb);
1812                         continue;
1813                 }
1814
1815                 i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
1816                 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1817                         i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1818                                            I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1819                                            I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1820                         rx_ring->last_rx_timestamp = jiffies;
1821                 }
1822
1823                 /* probably a little skewed due to removing CRC */
1824                 total_rx_bytes += skb->len;
1825                 total_rx_packets++;
1826
1827                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1828
1829                 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1830
1831                 vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1832                          ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1833                          : 0;
1834 #ifdef I40E_FCOE
1835                 if (!i40e_fcoe_handle_offload(rx_ring, rx_desc, skb)) {
1836                         dev_kfree_skb_any(skb);
1837                         continue;
1838                 }
1839 #endif
1840                 i40e_receive_skb(rx_ring, skb, vlan_tag);
1841
1842                 rx_desc->wb.qword1.status_error_len = 0;
1843         } while (likely(total_rx_packets < budget));
1844
1845         u64_stats_update_begin(&rx_ring->syncp);
1846         rx_ring->stats.packets += total_rx_packets;
1847         rx_ring->stats.bytes += total_rx_bytes;
1848         u64_stats_update_end(&rx_ring->syncp);
1849         rx_ring->q_vector->rx.total_packets += total_rx_packets;
1850         rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1851
1852         return failure ? budget : total_rx_packets;
1853 }
1854
1855 static u32 i40e_buildreg_itr(const int type, const u16 itr)
1856 {
1857         u32 val;
1858
1859         val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
1860               /* Don't clear PBA because that can cause lost interrupts that
1861                * came in while we were cleaning/polling
1862                */
1863               (type << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT) |
1864               (itr << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT);
1865
1866         return val;
1867 }
1868
1869 /* a small macro to shorten up some long lines */
1870 #define INTREG I40E_PFINT_DYN_CTLN
1871
1872 /**
1873  * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt
1874  * @vsi: the VSI we care about
1875  * @q_vector: q_vector for which itr is being updated and interrupt enabled
1876  *
1877  **/
1878 static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
1879                                           struct i40e_q_vector *q_vector)
1880 {
1881         struct i40e_hw *hw = &vsi->back->hw;
1882         bool rx = false, tx = false;
1883         u32 rxval, txval;
1884         int vector;
1885         int idx = q_vector->v_idx;
1886
1887         vector = (q_vector->v_idx + vsi->base_vector);
1888
1889         /* avoid dynamic calculation if in countdown mode OR if
1890          * all dynamic is disabled
1891          */
1892         rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0);
1893
1894         if (q_vector->itr_countdown > 0 ||
1895             (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) &&
1896              !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) {
1897                 goto enable_int;
1898         }
1899
1900         if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) {
1901                 rx = i40e_set_new_dynamic_itr(&q_vector->rx);
1902                 rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
1903         }
1904
1905         if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) {
1906                 tx = i40e_set_new_dynamic_itr(&q_vector->tx);
1907                 txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr);
1908         }
1909
1910         if (rx || tx) {
1911                 /* get the higher of the two ITR adjustments and
1912                  * use the same value for both ITR registers
1913                  * when in adaptive mode (Rx and/or Tx)
1914                  */
1915                 u16 itr = max(q_vector->tx.itr, q_vector->rx.itr);
1916
1917                 q_vector->tx.itr = q_vector->rx.itr = itr;
1918                 txval = i40e_buildreg_itr(I40E_TX_ITR, itr);
1919                 tx = true;
1920                 rxval = i40e_buildreg_itr(I40E_RX_ITR, itr);
1921                 rx = true;
1922         }
1923
1924         /* only need to enable the interrupt once, but need
1925          * to possibly update both ITR values
1926          */
1927         if (rx) {
1928                 /* set the INTENA_MSK_MASK so that this first write
1929                  * won't actually enable the interrupt, instead just
1930                  * updating the ITR (it's bit 31 PF and VF)
1931                  */
1932                 rxval |= BIT(31);
1933                 /* don't check _DOWN because interrupt isn't being enabled */
1934                 wr32(hw, INTREG(vector - 1), rxval);
1935         }
1936
1937 enable_int:
1938         if (!test_bit(__I40E_DOWN, &vsi->state))
1939                 wr32(hw, INTREG(vector - 1), txval);
1940
1941         if (q_vector->itr_countdown)
1942                 q_vector->itr_countdown--;
1943         else
1944                 q_vector->itr_countdown = ITR_COUNTDOWN_START;
1945 }
1946
1947 /**
1948  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1949  * @napi: napi struct with our devices info in it
1950  * @budget: amount of work driver is allowed to do this pass, in packets
1951  *
1952  * This function will clean all queues associated with a q_vector.
1953  *
1954  * Returns the amount of work done
1955  **/
1956 int i40e_napi_poll(struct napi_struct *napi, int budget)
1957 {
1958         struct i40e_q_vector *q_vector =
1959                                container_of(napi, struct i40e_q_vector, napi);
1960         struct i40e_vsi *vsi = q_vector->vsi;
1961         struct i40e_ring *ring;
1962         bool clean_complete = true;
1963         bool arm_wb = false;
1964         int budget_per_ring;
1965         int work_done = 0;
1966
1967         if (test_bit(__I40E_DOWN, &vsi->state)) {
1968                 napi_complete(napi);
1969                 return 0;
1970         }
1971
1972         /* Clear hung_detected bit */
1973         clear_bit(I40E_Q_VECTOR_HUNG_DETECT, &q_vector->hung_detected);
1974         /* Since the actual Tx work is minimal, we can give the Tx a larger
1975          * budget and be more aggressive about cleaning up the Tx descriptors.
1976          */
1977         i40e_for_each_ring(ring, q_vector->tx) {
1978                 clean_complete = clean_complete &&
1979                                  i40e_clean_tx_irq(ring, vsi->work_limit);
1980                 arm_wb = arm_wb || ring->arm_wb;
1981                 ring->arm_wb = false;
1982         }
1983
1984         /* Handle case where we are called by netpoll with a budget of 0 */
1985         if (budget <= 0)
1986                 goto tx_only;
1987
1988         /* We attempt to distribute budget to each Rx queue fairly, but don't
1989          * allow the budget to go below 1 because that would exit polling early.
1990          */
1991         budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1992
1993         i40e_for_each_ring(ring, q_vector->rx) {
1994                 int cleaned;
1995
1996                 if (ring_is_ps_enabled(ring))
1997                         cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
1998                 else
1999                         cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
2000
2001                 work_done += cleaned;
2002                 /* if we didn't clean as many as budgeted, we must be done */
2003                 clean_complete = clean_complete && (budget_per_ring > cleaned);
2004         }
2005
2006         /* If work not completed, return budget and polling will return */
2007         if (!clean_complete) {
2008 tx_only:
2009                 if (arm_wb) {
2010                         q_vector->tx.ring[0].tx_stats.tx_force_wb++;
2011                         i40e_enable_wb_on_itr(vsi, q_vector);
2012                 }
2013                 return budget;
2014         }
2015
2016         if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
2017                 q_vector->arm_wb_state = false;
2018
2019         /* Work is done so exit the polling mode and re-enable the interrupt */
2020         napi_complete_done(napi, work_done);
2021         if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
2022                 i40e_update_enable_itr(vsi, q_vector);
2023         } else { /* Legacy mode */
2024                 i40e_irq_dynamic_enable_icr0(vsi->back, false);
2025         }
2026         return 0;
2027 }
2028
2029 /**
2030  * i40e_atr - Add a Flow Director ATR filter
2031  * @tx_ring:  ring to add programming descriptor to
2032  * @skb:      send buffer
2033  * @tx_flags: send tx flags
2034  **/
2035 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
2036                      u32 tx_flags)
2037 {
2038         struct i40e_filter_program_desc *fdir_desc;
2039         struct i40e_pf *pf = tx_ring->vsi->back;
2040         union {
2041                 unsigned char *network;
2042                 struct iphdr *ipv4;
2043                 struct ipv6hdr *ipv6;
2044         } hdr;
2045         struct tcphdr *th;
2046         unsigned int hlen;
2047         u32 flex_ptype, dtype_cmd;
2048         int l4_proto;
2049         u16 i;
2050
2051         /* make sure ATR is enabled */
2052         if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
2053                 return;
2054
2055         if ((pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2056                 return;
2057
2058         /* if sampling is disabled do nothing */
2059         if (!tx_ring->atr_sample_rate)
2060                 return;
2061
2062         /* Currently only IPv4/IPv6 with TCP is supported */
2063         if (!(tx_flags & (I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6)))
2064                 return;
2065
2066         /* snag network header to get L4 type and address */
2067         hdr.network = (tx_flags & I40E_TX_FLAGS_UDP_TUNNEL) ?
2068                       skb_inner_network_header(skb) : skb_network_header(skb);
2069
2070         /* Note: tx_flags gets modified to reflect inner protocols in
2071          * tx_enable_csum function if encap is enabled.
2072          */
2073         if (tx_flags & I40E_TX_FLAGS_IPV4) {
2074                 /* access ihl as u8 to avoid unaligned access on ia64 */
2075                 hlen = (hdr.network[0] & 0x0F) << 2;
2076                 l4_proto = hdr.ipv4->protocol;
2077         } else {
2078                 hlen = hdr.network - skb->data;
2079                 l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
2080                 hlen -= hdr.network - skb->data;
2081         }
2082
2083         if (l4_proto != IPPROTO_TCP)
2084                 return;
2085
2086         th = (struct tcphdr *)(hdr.network + hlen);
2087
2088         /* Due to lack of space, no more new filters can be programmed */
2089         if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
2090                 return;
2091         if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2092             (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE))) {
2093                 /* HW ATR eviction will take care of removing filters on FIN
2094                  * and RST packets.
2095                  */
2096                 if (th->fin || th->rst)
2097                         return;
2098         }
2099
2100         tx_ring->atr_count++;
2101
2102         /* sample on all syn/fin/rst packets or once every atr sample rate */
2103         if (!th->fin &&
2104             !th->syn &&
2105             !th->rst &&
2106             (tx_ring->atr_count < tx_ring->atr_sample_rate))
2107                 return;
2108
2109         tx_ring->atr_count = 0;
2110
2111         /* grab the next descriptor */
2112         i = tx_ring->next_to_use;
2113         fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
2114
2115         i++;
2116         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2117
2118         flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
2119                       I40E_TXD_FLTR_QW0_QINDEX_MASK;
2120         flex_ptype |= (tx_flags & I40E_TX_FLAGS_IPV4) ?
2121                       (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
2122                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
2123                       (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
2124                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
2125
2126         flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
2127
2128         dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
2129
2130         dtype_cmd |= (th->fin || th->rst) ?
2131                      (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
2132                       I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
2133                      (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
2134                       I40E_TXD_FLTR_QW1_PCMD_SHIFT);
2135
2136         dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
2137                      I40E_TXD_FLTR_QW1_DEST_SHIFT;
2138
2139         dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
2140                      I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
2141
2142         dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
2143         if (!(tx_flags & I40E_TX_FLAGS_UDP_TUNNEL))
2144                 dtype_cmd |=
2145                         ((u32)I40E_FD_ATR_STAT_IDX(pf->hw.pf_id) <<
2146                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2147                         I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2148         else
2149                 dtype_cmd |=
2150                         ((u32)I40E_FD_ATR_TUNNEL_STAT_IDX(pf->hw.pf_id) <<
2151                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
2152                         I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
2153
2154         if ((pf->flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE) &&
2155             (!(pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE)))
2156                 dtype_cmd |= I40E_TXD_FLTR_QW1_ATR_MASK;
2157
2158         fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
2159         fdir_desc->rsvd = cpu_to_le32(0);
2160         fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
2161         fdir_desc->fd_id = cpu_to_le32(0);
2162 }
2163
2164 /**
2165  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
2166  * @skb:     send buffer
2167  * @tx_ring: ring to send buffer on
2168  * @flags:   the tx flags to be set
2169  *
2170  * Checks the skb and set up correspondingly several generic transmit flags
2171  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
2172  *
2173  * Returns error code indicate the frame should be dropped upon error and the
2174  * otherwise  returns 0 to indicate the flags has been set properly.
2175  **/
2176 #ifdef I40E_FCOE
2177 inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2178                                       struct i40e_ring *tx_ring,
2179                                       u32 *flags)
2180 #else
2181 static inline int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
2182                                              struct i40e_ring *tx_ring,
2183                                              u32 *flags)
2184 #endif
2185 {
2186         __be16 protocol = skb->protocol;
2187         u32  tx_flags = 0;
2188
2189         if (protocol == htons(ETH_P_8021Q) &&
2190             !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
2191                 /* When HW VLAN acceleration is turned off by the user the
2192                  * stack sets the protocol to 8021q so that the driver
2193                  * can take any steps required to support the SW only
2194                  * VLAN handling.  In our case the driver doesn't need
2195                  * to take any further steps so just set the protocol
2196                  * to the encapsulated ethertype.
2197                  */
2198                 skb->protocol = vlan_get_protocol(skb);
2199                 goto out;
2200         }
2201
2202         /* if we have a HW VLAN tag being added, default to the HW one */
2203         if (skb_vlan_tag_present(skb)) {
2204                 tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
2205                 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2206         /* else if it is a SW VLAN, check the next protocol and store the tag */
2207         } else if (protocol == htons(ETH_P_8021Q)) {
2208                 struct vlan_hdr *vhdr, _vhdr;
2209
2210                 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
2211                 if (!vhdr)
2212                         return -EINVAL;
2213
2214                 protocol = vhdr->h_vlan_encapsulated_proto;
2215                 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
2216                 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
2217         }
2218
2219         if (!(tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED))
2220                 goto out;
2221
2222         /* Insert 802.1p priority into VLAN header */
2223         if ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
2224             (skb->priority != TC_PRIO_CONTROL)) {
2225                 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
2226                 tx_flags |= (skb->priority & 0x7) <<
2227                                 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
2228                 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
2229                         struct vlan_ethhdr *vhdr;
2230                         int rc;
2231
2232                         rc = skb_cow_head(skb, 0);
2233                         if (rc < 0)
2234                                 return rc;
2235                         vhdr = (struct vlan_ethhdr *)skb->data;
2236                         vhdr->h_vlan_TCI = htons(tx_flags >>
2237                                                  I40E_TX_FLAGS_VLAN_SHIFT);
2238                 } else {
2239                         tx_flags |= I40E_TX_FLAGS_HW_VLAN;
2240                 }
2241         }
2242
2243 out:
2244         *flags = tx_flags;
2245         return 0;
2246 }
2247
2248 /**
2249  * i40e_tso - set up the tso context descriptor
2250  * @tx_ring:  ptr to the ring to send
2251  * @skb:      ptr to the skb we're sending
2252  * @hdr_len:  ptr to the size of the packet header
2253  * @cd_type_cmd_tso_mss: Quad Word 1
2254  *
2255  * Returns 0 if no TSO can happen, 1 if tso is going, or error
2256  **/
2257 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
2258                     u8 *hdr_len, u64 *cd_type_cmd_tso_mss)
2259 {
2260         u64 cd_cmd, cd_tso_len, cd_mss;
2261         union {
2262                 struct iphdr *v4;
2263                 struct ipv6hdr *v6;
2264                 unsigned char *hdr;
2265         } ip;
2266         union {
2267                 struct tcphdr *tcp;
2268                 struct udphdr *udp;
2269                 unsigned char *hdr;
2270         } l4;
2271         u32 paylen, l4_offset;
2272         int err;
2273
2274         if (skb->ip_summed != CHECKSUM_PARTIAL)
2275                 return 0;
2276
2277         if (!skb_is_gso(skb))
2278                 return 0;
2279
2280         err = skb_cow_head(skb, 0);
2281         if (err < 0)
2282                 return err;
2283
2284         ip.hdr = skb_network_header(skb);
2285         l4.hdr = skb_transport_header(skb);
2286
2287         /* initialize outer IP header fields */
2288         if (ip.v4->version == 4) {
2289                 ip.v4->tot_len = 0;
2290                 ip.v4->check = 0;
2291         } else {
2292                 ip.v6->payload_len = 0;
2293         }
2294
2295         if (skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL | SKB_GSO_GRE |
2296                                          SKB_GSO_UDP_TUNNEL_CSUM)) {
2297                 if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
2298                         /* determine offset of outer transport header */
2299                         l4_offset = l4.hdr - skb->data;
2300
2301                         /* remove payload length from outer checksum */
2302                         paylen = (__force u16)l4.udp->check;
2303                         paylen += ntohs(1) * (u16)~(skb->len - l4_offset);
2304                         l4.udp->check = ~csum_fold((__force __wsum)paylen);
2305                 }
2306
2307                 /* reset pointers to inner headers */
2308                 ip.hdr = skb_inner_network_header(skb);
2309                 l4.hdr = skb_inner_transport_header(skb);
2310
2311                 /* initialize inner IP header fields */
2312                 if (ip.v4->version == 4) {
2313                         ip.v4->tot_len = 0;
2314                         ip.v4->check = 0;
2315                 } else {
2316                         ip.v6->payload_len = 0;
2317                 }
2318         }
2319
2320         /* determine offset of inner transport header */
2321         l4_offset = l4.hdr - skb->data;
2322
2323         /* remove payload length from inner checksum */
2324         paylen = (__force u16)l4.tcp->check;
2325         paylen += ntohs(1) * (u16)~(skb->len - l4_offset);
2326         l4.tcp->check = ~csum_fold((__force __wsum)paylen);
2327
2328         /* compute length of segmentation header */
2329         *hdr_len = (l4.tcp->doff * 4) + l4_offset;
2330
2331         /* find the field values */
2332         cd_cmd = I40E_TX_CTX_DESC_TSO;
2333         cd_tso_len = skb->len - *hdr_len;
2334         cd_mss = skb_shinfo(skb)->gso_size;
2335         *cd_type_cmd_tso_mss |= (cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
2336                                 (cd_tso_len << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
2337                                 (cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
2338         return 1;
2339 }
2340
2341 /**
2342  * i40e_tsyn - set up the tsyn context descriptor
2343  * @tx_ring:  ptr to the ring to send
2344  * @skb:      ptr to the skb we're sending
2345  * @tx_flags: the collected send information
2346  * @cd_type_cmd_tso_mss: Quad Word 1
2347  *
2348  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
2349  **/
2350 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
2351                      u32 tx_flags, u64 *cd_type_cmd_tso_mss)
2352 {
2353         struct i40e_pf *pf;
2354
2355         if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
2356                 return 0;
2357
2358         /* Tx timestamps cannot be sampled when doing TSO */
2359         if (tx_flags & I40E_TX_FLAGS_TSO)
2360                 return 0;
2361
2362         /* only timestamp the outbound packet if the user has requested it and
2363          * we are not already transmitting a packet to be timestamped
2364          */
2365         pf = i40e_netdev_to_pf(tx_ring->netdev);
2366         if (!(pf->flags & I40E_FLAG_PTP))
2367                 return 0;
2368
2369         if (pf->ptp_tx &&
2370             !test_and_set_bit_lock(__I40E_PTP_TX_IN_PROGRESS, &pf->state)) {
2371                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2372                 pf->ptp_tx_skb = skb_get(skb);
2373         } else {
2374                 return 0;
2375         }
2376
2377         *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
2378                                 I40E_TXD_CTX_QW1_CMD_SHIFT;
2379
2380         return 1;
2381 }
2382
2383 /**
2384  * i40e_tx_enable_csum - Enable Tx checksum offloads
2385  * @skb: send buffer
2386  * @tx_flags: pointer to Tx flags currently set
2387  * @td_cmd: Tx descriptor command bits to set
2388  * @td_offset: Tx descriptor header offsets to set
2389  * @tx_ring: Tx descriptor ring
2390  * @cd_tunneling: ptr to context desc bits
2391  **/
2392 static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
2393                                u32 *td_cmd, u32 *td_offset,
2394                                struct i40e_ring *tx_ring,
2395                                u32 *cd_tunneling)
2396 {
2397         union {
2398                 struct iphdr *v4;
2399                 struct ipv6hdr *v6;
2400                 unsigned char *hdr;
2401         } ip;
2402         union {
2403                 struct tcphdr *tcp;
2404                 struct udphdr *udp;
2405                 unsigned char *hdr;
2406         } l4;
2407         unsigned char *exthdr;
2408         u32 offset, cmd = 0, tunnel = 0;
2409         __be16 frag_off;
2410         u8 l4_proto = 0;
2411
2412         if (skb->ip_summed != CHECKSUM_PARTIAL)
2413                 return 0;
2414
2415         ip.hdr = skb_network_header(skb);
2416         l4.hdr = skb_transport_header(skb);
2417
2418         /* compute outer L2 header size */
2419         offset = ((ip.hdr - skb->data) / 2) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
2420
2421         if (skb->encapsulation) {
2422                 /* define outer network header type */
2423                 if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2424                         tunnel |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2425                                   I40E_TX_CTX_EXT_IP_IPV4 :
2426                                   I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
2427
2428                         l4_proto = ip.v4->protocol;
2429                 } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2430                         tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
2431
2432                         exthdr = ip.hdr + sizeof(*ip.v6);
2433                         l4_proto = ip.v6->nexthdr;
2434                         if (l4.hdr != exthdr)
2435                                 ipv6_skip_exthdr(skb, exthdr - skb->data,
2436                                                  &l4_proto, &frag_off);
2437                 }
2438
2439                 /* compute outer L3 header size */
2440                 tunnel |= ((l4.hdr - ip.hdr) / 4) <<
2441                           I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT;
2442
2443                 /* switch IP header pointer from outer to inner header */
2444                 ip.hdr = skb_inner_network_header(skb);
2445
2446                 /* define outer transport */
2447                 switch (l4_proto) {
2448                 case IPPROTO_UDP:
2449                         tunnel |= I40E_TXD_CTX_UDP_TUNNELING;
2450                         *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2451                         break;
2452                 case IPPROTO_GRE:
2453                         tunnel |= I40E_TXD_CTX_GRE_TUNNELING;
2454                         *tx_flags |= I40E_TX_FLAGS_UDP_TUNNEL;
2455                         break;
2456                 default:
2457                         if (*tx_flags & I40E_TX_FLAGS_TSO)
2458                                 return -1;
2459
2460                         skb_checksum_help(skb);
2461                         return 0;
2462                 }
2463
2464                 /* compute tunnel header size */
2465                 tunnel |= ((ip.hdr - l4.hdr) / 2) <<
2466                           I40E_TXD_CTX_QW0_NATLEN_SHIFT;
2467
2468                 /* indicate if we need to offload outer UDP header */
2469                 if ((*tx_flags & I40E_TX_FLAGS_TSO) &&
2470                     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
2471                         tunnel |= I40E_TXD_CTX_QW0_L4T_CS_MASK;
2472
2473                 /* record tunnel offload values */
2474                 *cd_tunneling |= tunnel;
2475
2476                 /* switch L4 header pointer from outer to inner */
2477                 l4.hdr = skb_inner_transport_header(skb);
2478                 l4_proto = 0;
2479
2480                 /* reset type as we transition from outer to inner headers */
2481                 *tx_flags &= ~(I40E_TX_FLAGS_IPV4 | I40E_TX_FLAGS_IPV6);
2482                 if (ip.v4->version == 4)
2483                         *tx_flags |= I40E_TX_FLAGS_IPV4;
2484                 if (ip.v6->version == 6)
2485                         *tx_flags |= I40E_TX_FLAGS_IPV6;
2486         }
2487
2488         /* Enable IP checksum offloads */
2489         if (*tx_flags & I40E_TX_FLAGS_IPV4) {
2490                 l4_proto = ip.v4->protocol;
2491                 /* the stack computes the IP header already, the only time we
2492                  * need the hardware to recompute it is in the case of TSO.
2493                  */
2494                 cmd |= (*tx_flags & I40E_TX_FLAGS_TSO) ?
2495                        I40E_TX_DESC_CMD_IIPT_IPV4_CSUM :
2496                        I40E_TX_DESC_CMD_IIPT_IPV4;
2497         } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
2498                 cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
2499
2500                 exthdr = ip.hdr + sizeof(*ip.v6);
2501                 l4_proto = ip.v6->nexthdr;
2502                 if (l4.hdr != exthdr)
2503                         ipv6_skip_exthdr(skb, exthdr - skb->data,
2504                                          &l4_proto, &frag_off);
2505         }
2506
2507         /* compute inner L3 header size */
2508         offset |= ((l4.hdr - ip.hdr) / 4) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
2509
2510         /* Enable L4 checksum offloads */
2511         switch (l4_proto) {
2512         case IPPROTO_TCP:
2513                 /* enable checksum offloads */
2514                 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
2515                 offset |= l4.tcp->doff << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2516                 break;
2517         case IPPROTO_SCTP:
2518                 /* enable SCTP checksum offload */
2519                 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
2520                 offset |= (sizeof(struct sctphdr) >> 2) <<
2521                           I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2522                 break;
2523         case IPPROTO_UDP:
2524                 /* enable UDP checksum offload */
2525                 cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
2526                 offset |= (sizeof(struct udphdr) >> 2) <<
2527                           I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
2528                 break;
2529         default:
2530                 if (*tx_flags & I40E_TX_FLAGS_TSO)
2531                         return -1;
2532                 skb_checksum_help(skb);
2533                 return 0;
2534         }
2535
2536         *td_cmd |= cmd;
2537         *td_offset |= offset;
2538
2539         return 1;
2540 }
2541
2542 /**
2543  * i40e_create_tx_ctx Build the Tx context descriptor
2544  * @tx_ring:  ring to create the descriptor on
2545  * @cd_type_cmd_tso_mss: Quad Word 1
2546  * @cd_tunneling: Quad Word 0 - bits 0-31
2547  * @cd_l2tag2: Quad Word 0 - bits 32-63
2548  **/
2549 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
2550                                const u64 cd_type_cmd_tso_mss,
2551                                const u32 cd_tunneling, const u32 cd_l2tag2)
2552 {
2553         struct i40e_tx_context_desc *context_desc;
2554         int i = tx_ring->next_to_use;
2555
2556         if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
2557             !cd_tunneling && !cd_l2tag2)
2558                 return;
2559
2560         /* grab the next descriptor */
2561         context_desc = I40E_TX_CTXTDESC(tx_ring, i);
2562
2563         i++;
2564         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
2565
2566         /* cpu_to_le32 and assign to struct fields */
2567         context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
2568         context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
2569         context_desc->rsvd = cpu_to_le16(0);
2570         context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
2571 }
2572
2573 /**
2574  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2575  * @tx_ring: the ring to be checked
2576  * @size:    the size buffer we want to assure is available
2577  *
2578  * Returns -EBUSY if a stop is needed, else 0
2579  **/
2580 int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2581 {
2582         netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2583         /* Memory barrier before checking head and tail */
2584         smp_mb();
2585
2586         /* Check again in a case another CPU has just made room available. */
2587         if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2588                 return -EBUSY;
2589
2590         /* A reprieve! - use start_queue because it doesn't call schedule */
2591         netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2592         ++tx_ring->tx_stats.restart_queue;
2593         return 0;
2594 }
2595
2596 /**
2597  * __i40e_chk_linearize - Check if there are more than 8 buffers per packet
2598  * @skb:      send buffer
2599  *
2600  * Note: Our HW can't DMA more than 8 buffers to build a packet on the wire
2601  * and so we need to figure out the cases where we need to linearize the skb.
2602  *
2603  * For TSO we need to count the TSO header and segment payload separately.
2604  * As such we need to check cases where we have 7 fragments or more as we
2605  * can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
2606  * the segment payload in the first descriptor, and another 7 for the
2607  * fragments.
2608  **/
2609 bool __i40e_chk_linearize(struct sk_buff *skb)
2610 {
2611         const struct skb_frag_struct *frag, *stale;
2612         int nr_frags, sum;
2613
2614         /* no need to check if number of frags is less than 7 */
2615         nr_frags = skb_shinfo(skb)->nr_frags;
2616         if (nr_frags < (I40E_MAX_BUFFER_TXD - 1))
2617                 return false;
2618
2619         /* We need to walk through the list and validate that each group
2620          * of 6 fragments totals at least gso_size.  However we don't need
2621          * to perform such validation on the last 6 since the last 6 cannot
2622          * inherit any data from a descriptor after them.
2623          */
2624         nr_frags -= I40E_MAX_BUFFER_TXD - 2;
2625         frag = &skb_shinfo(skb)->frags[0];
2626
2627         /* Initialize size to the negative value of gso_size minus 1.  We
2628          * use this as the worst case scenerio in which the frag ahead
2629          * of us only provides one byte which is why we are limited to 6
2630          * descriptors for a single transmit as the header and previous
2631          * fragment are already consuming 2 descriptors.
2632          */
2633         sum = 1 - skb_shinfo(skb)->gso_size;
2634
2635         /* Add size of frags 0 through 4 to create our initial sum */
2636         sum += skb_frag_size(frag++);
2637         sum += skb_frag_size(frag++);
2638         sum += skb_frag_size(frag++);
2639         sum += skb_frag_size(frag++);
2640         sum += skb_frag_size(frag++);
2641
2642         /* Walk through fragments adding latest fragment, testing it, and
2643          * then removing stale fragments from the sum.
2644          */
2645         stale = &skb_shinfo(skb)->frags[0];
2646         for (;;) {
2647                 sum += skb_frag_size(frag++);
2648
2649                 /* if sum is negative we failed to make sufficient progress */
2650                 if (sum < 0)
2651                         return true;
2652
2653                 /* use pre-decrement to avoid processing last fragment */
2654                 if (!--nr_frags)
2655                         break;
2656
2657                 sum -= skb_frag_size(stale++);
2658         }
2659
2660         return false;
2661 }
2662
2663 /**
2664  * i40e_tx_map - Build the Tx descriptor
2665  * @tx_ring:  ring to send buffer on
2666  * @skb:      send buffer
2667  * @first:    first buffer info buffer to use
2668  * @tx_flags: collected send information
2669  * @hdr_len:  size of the packet header
2670  * @td_cmd:   the command field in the descriptor
2671  * @td_offset: offset for checksum or crc
2672  **/
2673 #ifdef I40E_FCOE
2674 inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2675                         struct i40e_tx_buffer *first, u32 tx_flags,
2676                         const u8 hdr_len, u32 td_cmd, u32 td_offset)
2677 #else
2678 static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
2679                                struct i40e_tx_buffer *first, u32 tx_flags,
2680                                const u8 hdr_len, u32 td_cmd, u32 td_offset)
2681 #endif
2682 {
2683         unsigned int data_len = skb->data_len;
2684         unsigned int size = skb_headlen(skb);
2685         struct skb_frag_struct *frag;
2686         struct i40e_tx_buffer *tx_bi;
2687         struct i40e_tx_desc *tx_desc;
2688         u16 i = tx_ring->next_to_use;
2689         u32 td_tag = 0;
2690         dma_addr_t dma;
2691         u16 gso_segs;
2692         u16 desc_count = 0;
2693         bool tail_bump = true;
2694         bool do_rs = false;
2695
2696         if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
2697                 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
2698                 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
2699                          I40E_TX_FLAGS_VLAN_SHIFT;
2700         }
2701
2702         if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2703                 gso_segs = skb_shinfo(skb)->gso_segs;
2704         else
2705                 gso_segs = 1;
2706
2707         /* multiply data chunks by size of headers */
2708         first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2709         first->gso_segs = gso_segs;
2710         first->skb = skb;
2711         first->tx_flags = tx_flags;
2712
2713         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2714
2715         tx_desc = I40E_TX_DESC(tx_ring, i);
2716         tx_bi = first;
2717
2718         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2719                 if (dma_mapping_error(tx_ring->dev, dma))
2720                         goto dma_error;
2721
2722                 /* record length, and DMA address */
2723                 dma_unmap_len_set(tx_bi, len, size);
2724                 dma_unmap_addr_set(tx_bi, dma, dma);
2725
2726                 tx_desc->buffer_addr = cpu_to_le64(dma);
2727
2728                 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2729                         tx_desc->cmd_type_offset_bsz =
2730                                 build_ctob(td_cmd, td_offset,
2731                                            I40E_MAX_DATA_PER_TXD, td_tag);
2732
2733                         tx_desc++;
2734                         i++;
2735                         desc_count++;
2736
2737                         if (i == tx_ring->count) {
2738                                 tx_desc = I40E_TX_DESC(tx_ring, 0);
2739                                 i = 0;
2740                         }
2741
2742                         dma += I40E_MAX_DATA_PER_TXD;
2743                         size -= I40E_MAX_DATA_PER_TXD;
2744
2745                         tx_desc->buffer_addr = cpu_to_le64(dma);
2746                 }
2747
2748                 if (likely(!data_len))
2749                         break;
2750
2751                 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2752                                                           size, td_tag);
2753
2754                 tx_desc++;
2755                 i++;
2756                 desc_count++;
2757
2758                 if (i == tx_ring->count) {
2759                         tx_desc = I40E_TX_DESC(tx_ring, 0);
2760                         i = 0;
2761                 }
2762
2763                 size = skb_frag_size(frag);
2764                 data_len -= size;
2765
2766                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2767                                        DMA_TO_DEVICE);
2768
2769                 tx_bi = &tx_ring->tx_bi[i];
2770         }
2771
2772         /* set next_to_watch value indicating a packet is present */
2773         first->next_to_watch = tx_desc;
2774
2775         i++;
2776         if (i == tx_ring->count)
2777                 i = 0;
2778
2779         tx_ring->next_to_use = i;
2780
2781         netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2782                                                  tx_ring->queue_index),
2783                                                  first->bytecount);
2784         i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2785
2786         /* Algorithm to optimize tail and RS bit setting:
2787          * if xmit_more is supported
2788          *      if xmit_more is true
2789          *              do not update tail and do not mark RS bit.
2790          *      if xmit_more is false and last xmit_more was false
2791          *              if every packet spanned less than 4 desc
2792          *                      then set RS bit on 4th packet and update tail
2793          *                      on every packet
2794          *              else
2795          *                      update tail and set RS bit on every packet.
2796          *      if xmit_more is false and last_xmit_more was true
2797          *              update tail and set RS bit.
2798          *
2799          * Optimization: wmb to be issued only in case of tail update.
2800          * Also optimize the Descriptor WB path for RS bit with the same
2801          * algorithm.
2802          *
2803          * Note: If there are less than 4 packets
2804          * pending and interrupts were disabled the service task will
2805          * trigger a force WB.
2806          */
2807         if (skb->xmit_more  &&
2808             !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2809                                                     tx_ring->queue_index))) {
2810                 tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2811                 tail_bump = false;
2812         } else if (!skb->xmit_more &&
2813                    !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev,
2814                                                        tx_ring->queue_index)) &&
2815                    (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
2816                    (tx_ring->packet_stride < WB_STRIDE) &&
2817                    (desc_count < WB_STRIDE)) {
2818                 tx_ring->packet_stride++;
2819         } else {
2820                 tx_ring->packet_stride = 0;
2821                 tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
2822                 do_rs = true;
2823         }
2824         if (do_rs)
2825                 tx_ring->packet_stride = 0;
2826
2827         tx_desc->cmd_type_offset_bsz =
2828                         build_ctob(td_cmd, td_offset, size, td_tag) |
2829                         cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
2830                                                   I40E_TX_DESC_CMD_EOP) <<
2831                                                   I40E_TXD_QW1_CMD_SHIFT);
2832
2833         /* notify HW of packet */
2834         if (!tail_bump)
2835                 prefetchw(tx_desc + 1);
2836
2837         if (tail_bump) {
2838                 /* Force memory writes to complete before letting h/w
2839                  * know there are new descriptors to fetch.  (Only
2840                  * applicable for weak-ordered memory model archs,
2841                  * such as IA-64).
2842                  */
2843                 wmb();
2844                 writel(i, tx_ring->tail);
2845         }
2846
2847         return;
2848
2849 dma_error:
2850         dev_info(tx_ring->dev, "TX DMA map failed\n");
2851
2852         /* clear dma mappings for failed tx_bi map */
2853         for (;;) {
2854                 tx_bi = &tx_ring->tx_bi[i];
2855                 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2856                 if (tx_bi == first)
2857                         break;
2858                 if (i == 0)
2859                         i = tx_ring->count;
2860                 i--;
2861         }
2862
2863         tx_ring->next_to_use = i;
2864 }
2865
2866 /**
2867  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2868  * @skb:     send buffer
2869  * @tx_ring: ring to send buffer on
2870  *
2871  * Returns NETDEV_TX_OK if sent, else an error code
2872  **/
2873 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2874                                         struct i40e_ring *tx_ring)
2875 {
2876         u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2877         u32 cd_tunneling = 0, cd_l2tag2 = 0;
2878         struct i40e_tx_buffer *first;
2879         u32 td_offset = 0;
2880         u32 tx_flags = 0;
2881         __be16 protocol;
2882         u32 td_cmd = 0;
2883         u8 hdr_len = 0;
2884         int tso, count;
2885         int tsyn;
2886
2887         /* prefetch the data, we'll need it later */
2888         prefetch(skb->data);
2889
2890         count = i40e_xmit_descriptor_count(skb);
2891         if (i40e_chk_linearize(skb, count)) {
2892                 if (__skb_linearize(skb))
2893                         goto out_drop;
2894                 count = TXD_USE_COUNT(skb->len);
2895                 tx_ring->tx_stats.tx_linearize++;
2896         }
2897
2898         /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2899          *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2900          *       + 4 desc gap to avoid the cache line where head is,
2901          *       + 1 desc for context descriptor,
2902          * otherwise try next time
2903          */
2904         if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2905                 tx_ring->tx_stats.tx_busy++;
2906                 return NETDEV_TX_BUSY;
2907         }
2908
2909         /* prepare the xmit flags */
2910         if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2911                 goto out_drop;
2912
2913         /* obtain protocol of skb */
2914         protocol = vlan_get_protocol(skb);
2915
2916         /* record the location of the first descriptor for this packet */
2917         first = &tx_ring->tx_bi[tx_ring->next_to_use];
2918
2919         /* setup IPv4/IPv6 offloads */
2920         if (protocol == htons(ETH_P_IP))
2921                 tx_flags |= I40E_TX_FLAGS_IPV4;
2922         else if (protocol == htons(ETH_P_IPV6))
2923                 tx_flags |= I40E_TX_FLAGS_IPV6;
2924
2925         tso = i40e_tso(tx_ring, skb, &hdr_len, &cd_type_cmd_tso_mss);
2926
2927         if (tso < 0)
2928                 goto out_drop;
2929         else if (tso)
2930                 tx_flags |= I40E_TX_FLAGS_TSO;
2931
2932         /* Always offload the checksum, since it's in the data descriptor */
2933         tso = i40e_tx_enable_csum(skb, &tx_flags, &td_cmd, &td_offset,
2934                                   tx_ring, &cd_tunneling);
2935         if (tso < 0)
2936                 goto out_drop;
2937
2938         tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2939
2940         if (tsyn)
2941                 tx_flags |= I40E_TX_FLAGS_TSYN;
2942
2943         skb_tx_timestamp(skb);
2944
2945         /* always enable CRC insertion offload */
2946         td_cmd |= I40E_TX_DESC_CMD_ICRC;
2947
2948         i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2949                            cd_tunneling, cd_l2tag2);
2950
2951         /* Add Flow Director ATR if it's enabled.
2952          *
2953          * NOTE: this must always be directly before the data descriptor.
2954          */
2955         i40e_atr(tx_ring, skb, tx_flags);
2956
2957         i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2958                     td_cmd, td_offset);
2959
2960         return NETDEV_TX_OK;
2961
2962 out_drop:
2963         dev_kfree_skb_any(skb);
2964         return NETDEV_TX_OK;
2965 }
2966
2967 /**
2968  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2969  * @skb:    send buffer
2970  * @netdev: network interface device structure
2971  *
2972  * Returns NETDEV_TX_OK if sent, else an error code
2973  **/
2974 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2975 {
2976         struct i40e_netdev_priv *np = netdev_priv(netdev);
2977         struct i40e_vsi *vsi = np->vsi;
2978         struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2979
2980         /* hardware can't handle really short frames, hardware padding works
2981          * beyond this point
2982          */
2983         if (skb_put_padto(skb, I40E_MIN_TX_LEN))
2984                 return NETDEV_TX_OK;
2985
2986         return i40e_xmit_frame_ring(skb, tx_ring);
2987 }