Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6-block.git] / drivers / net / ethernet / intel / i40e / i40e_txrx.c
1 /*******************************************************************************
2  *
3  * Intel Ethernet Controller XL710 Family Linux Driver
4  * Copyright(c) 2013 - 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms and conditions of the GNU General Public License,
8  * version 2, as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope it will be useful, but WITHOUT
11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13  * more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  * Contact Information:
22  * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
23  * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
24  *
25  ******************************************************************************/
26
27 #include "i40e.h"
28 #include "i40e_prototype.h"
29
30 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
31                                 u32 td_tag)
32 {
33         return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
34                            ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
35                            ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
36                            ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
37                            ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
38 }
39
40 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
41 /**
42  * i40e_program_fdir_filter - Program a Flow Director filter
43  * @fdir_data: Packet data that will be filter parameters
44  * @raw_packet: the pre-allocated packet buffer for FDir
45  * @pf: The pf pointer
46  * @add: True for add/update, False for remove
47  **/
48 int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
49                              struct i40e_pf *pf, bool add)
50 {
51         struct i40e_filter_program_desc *fdir_desc;
52         struct i40e_tx_buffer *tx_buf;
53         struct i40e_tx_desc *tx_desc;
54         struct i40e_ring *tx_ring;
55         unsigned int fpt, dcc;
56         struct i40e_vsi *vsi;
57         struct device *dev;
58         dma_addr_t dma;
59         u32 td_cmd = 0;
60         u16 i;
61
62         /* find existing FDIR VSI */
63         vsi = NULL;
64         for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
65                 if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
66                         vsi = pf->vsi[i];
67         if (!vsi)
68                 return -ENOENT;
69
70         tx_ring = vsi->tx_rings[0];
71         dev = tx_ring->dev;
72
73         dma = dma_map_single(dev, raw_packet,
74                              I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
75         if (dma_mapping_error(dev, dma))
76                 goto dma_fail;
77
78         /* grab the next descriptor */
79         i = tx_ring->next_to_use;
80         fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
81
82         tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
83
84         fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
85               I40E_TXD_FLTR_QW0_QINDEX_MASK;
86
87         fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) &
88                I40E_TXD_FLTR_QW0_FLEXOFF_MASK;
89
90         fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) &
91                I40E_TXD_FLTR_QW0_PCTYPE_MASK;
92
93         /* Use LAN VSI Id if not programmed by user */
94         if (fdir_data->dest_vsi == 0)
95                 fpt |= (pf->vsi[pf->lan_vsi]->id) <<
96                        I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
97         else
98                 fpt |= ((u32)fdir_data->dest_vsi <<
99                         I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) &
100                        I40E_TXD_FLTR_QW0_DEST_VSI_MASK;
101
102         fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt);
103
104         dcc = I40E_TX_DESC_DTYPE_FILTER_PROG;
105
106         if (add)
107                 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
108                        I40E_TXD_FLTR_QW1_PCMD_SHIFT;
109         else
110                 dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
111                        I40E_TXD_FLTR_QW1_PCMD_SHIFT;
112
113         dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) &
114                I40E_TXD_FLTR_QW1_DEST_MASK;
115
116         dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) &
117                I40E_TXD_FLTR_QW1_FD_STATUS_MASK;
118
119         if (fdir_data->cnt_index != 0) {
120                 dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
121                 dcc |= ((u32)fdir_data->cnt_index <<
122                         I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) &
123                        I40E_TXD_FLTR_QW1_CNTINDEX_MASK;
124         }
125
126         fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc);
127         fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
128
129         /* Now program a dummy descriptor */
130         i = tx_ring->next_to_use;
131         tx_desc = I40E_TX_DESC(tx_ring, i);
132         tx_buf = &tx_ring->tx_bi[i];
133
134         tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
135
136         /* record length, and DMA address */
137         dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
138         dma_unmap_addr_set(tx_buf, dma, dma);
139
140         tx_desc->buffer_addr = cpu_to_le64(dma);
141         td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
142
143         tx_desc->cmd_type_offset_bsz =
144                 build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
145
146         /* set the timestamp */
147         tx_buf->time_stamp = jiffies;
148
149         /* Force memory writes to complete before letting h/w
150          * know there are new descriptors to fetch.  (Only
151          * applicable for weak-ordered memory model archs,
152          * such as IA-64).
153          */
154         wmb();
155
156         /* Mark the data descriptor to be watched */
157         tx_buf->next_to_watch = tx_desc;
158
159         writel(tx_ring->next_to_use, tx_ring->tail);
160         return 0;
161
162 dma_fail:
163         return -1;
164 }
165
166 #define IP_HEADER_OFFSET 14
167 #define I40E_UDPIP_DUMMY_PACKET_LEN 42
168 /**
169  * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
170  * @vsi: pointer to the targeted VSI
171  * @fd_data: the flow director data required for the FDir descriptor
172  * @raw_packet: the pre-allocated packet buffer for FDir
173  * @add: true adds a filter, false removes it
174  *
175  * Returns 0 if the filters were successfully added or removed
176  **/
177 static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
178                                    struct i40e_fdir_filter *fd_data,
179                                    u8 *raw_packet, bool add)
180 {
181         struct i40e_pf *pf = vsi->back;
182         struct udphdr *udp;
183         struct iphdr *ip;
184         bool err = false;
185         int ret;
186         int i;
187         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
188                 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
189                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
190
191         memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
192
193         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
194         udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
195               + sizeof(struct iphdr));
196
197         ip->daddr = fd_data->dst_ip[0];
198         udp->dest = fd_data->dst_port;
199         ip->saddr = fd_data->src_ip[0];
200         udp->source = fd_data->src_port;
201
202         for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
203              i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
204                 fd_data->pctype = i;
205                 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
206
207                 if (ret) {
208                         dev_info(&pf->pdev->dev,
209                                  "Filter command send failed for PCTYPE %d (ret = %d)\n",
210                                  fd_data->pctype, ret);
211                         err = true;
212                 } else {
213                         dev_info(&pf->pdev->dev,
214                                  "Filter OK for PCTYPE %d (ret = %d)\n",
215                                  fd_data->pctype, ret);
216                 }
217         }
218
219         return err ? -EOPNOTSUPP : 0;
220 }
221
222 #define I40E_TCPIP_DUMMY_PACKET_LEN 54
223 /**
224  * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
225  * @vsi: pointer to the targeted VSI
226  * @fd_data: the flow director data required for the FDir descriptor
227  * @raw_packet: the pre-allocated packet buffer for FDir
228  * @add: true adds a filter, false removes it
229  *
230  * Returns 0 if the filters were successfully added or removed
231  **/
232 static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
233                                    struct i40e_fdir_filter *fd_data,
234                                    u8 *raw_packet, bool add)
235 {
236         struct i40e_pf *pf = vsi->back;
237         struct tcphdr *tcp;
238         struct iphdr *ip;
239         bool err = false;
240         int ret;
241         /* Dummy packet */
242         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
243                 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
244                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
245                 0x0, 0x72, 0, 0, 0, 0};
246
247         memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
248
249         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
250         tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
251               + sizeof(struct iphdr));
252
253         ip->daddr = fd_data->dst_ip[0];
254         tcp->dest = fd_data->dst_port;
255         ip->saddr = fd_data->src_ip[0];
256         tcp->source = fd_data->src_port;
257
258         if (add) {
259                 if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
260                         dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
261                         pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
262                 }
263         }
264
265         fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
266         ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
267
268         if (ret) {
269                 dev_info(&pf->pdev->dev,
270                          "Filter command send failed for PCTYPE %d (ret = %d)\n",
271                          fd_data->pctype, ret);
272                 err = true;
273         } else {
274                 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
275                          fd_data->pctype, ret);
276         }
277
278         fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
279
280         ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
281         if (ret) {
282                 dev_info(&pf->pdev->dev,
283                          "Filter command send failed for PCTYPE %d (ret = %d)\n",
284                          fd_data->pctype, ret);
285                 err = true;
286         } else {
287                 dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
288                           fd_data->pctype, ret);
289         }
290
291         return err ? -EOPNOTSUPP : 0;
292 }
293
294 /**
295  * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
296  * a specific flow spec
297  * @vsi: pointer to the targeted VSI
298  * @fd_data: the flow director data required for the FDir descriptor
299  * @raw_packet: the pre-allocated packet buffer for FDir
300  * @add: true adds a filter, false removes it
301  *
302  * Returns 0 if the filters were successfully added or removed
303  **/
304 static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
305                                     struct i40e_fdir_filter *fd_data,
306                                     u8 *raw_packet, bool add)
307 {
308         return -EOPNOTSUPP;
309 }
310
311 #define I40E_IP_DUMMY_PACKET_LEN 34
312 /**
313  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
314  * a specific flow spec
315  * @vsi: pointer to the targeted VSI
316  * @fd_data: the flow director data required for the FDir descriptor
317  * @raw_packet: the pre-allocated packet buffer for FDir
318  * @add: true adds a filter, false removes it
319  *
320  * Returns 0 if the filters were successfully added or removed
321  **/
322 static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
323                                   struct i40e_fdir_filter *fd_data,
324                                   u8 *raw_packet, bool add)
325 {
326         struct i40e_pf *pf = vsi->back;
327         struct iphdr *ip;
328         bool err = false;
329         int ret;
330         int i;
331         static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
332                 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
333                 0, 0, 0, 0};
334
335         memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
336         ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
337
338         ip->saddr = fd_data->src_ip[0];
339         ip->daddr = fd_data->dst_ip[0];
340         ip->protocol = 0;
341
342         for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
343              i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
344                 fd_data->pctype = i;
345                 ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
346
347                 if (ret) {
348                         dev_info(&pf->pdev->dev,
349                                  "Filter command send failed for PCTYPE %d (ret = %d)\n",
350                                  fd_data->pctype, ret);
351                         err = true;
352                 } else {
353                         dev_info(&pf->pdev->dev,
354                                  "Filter OK for PCTYPE %d (ret = %d)\n",
355                                  fd_data->pctype, ret);
356                 }
357         }
358
359         return err ? -EOPNOTSUPP : 0;
360 }
361
362 /**
363  * i40e_add_del_fdir - Build raw packets to add/del fdir filter
364  * @vsi: pointer to the targeted VSI
365  * @cmd: command to get or set RX flow classification rules
366  * @add: true adds a filter, false removes it
367  *
368  **/
369 int i40e_add_del_fdir(struct i40e_vsi *vsi,
370                       struct i40e_fdir_filter *input, bool add)
371 {
372         struct i40e_pf *pf = vsi->back;
373         u8 *raw_packet;
374         int ret;
375
376         /* Populate the Flow Director that we have at the moment
377          * and allocate the raw packet buffer for the calling functions
378          */
379         raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
380         if (!raw_packet)
381                 return -ENOMEM;
382
383         switch (input->flow_type & ~FLOW_EXT) {
384         case TCP_V4_FLOW:
385                 ret = i40e_add_del_fdir_tcpv4(vsi, input, raw_packet,
386                                               add);
387                 break;
388         case UDP_V4_FLOW:
389                 ret = i40e_add_del_fdir_udpv4(vsi, input, raw_packet,
390                                               add);
391                 break;
392         case SCTP_V4_FLOW:
393                 ret = i40e_add_del_fdir_sctpv4(vsi, input, raw_packet,
394                                                add);
395                 break;
396         case IPV4_FLOW:
397                 ret = i40e_add_del_fdir_ipv4(vsi, input, raw_packet,
398                                              add);
399                 break;
400         case IP_USER_FLOW:
401                 switch (input->ip4_proto) {
402                 case IPPROTO_TCP:
403                         ret = i40e_add_del_fdir_tcpv4(vsi, input,
404                                                       raw_packet, add);
405                         break;
406                 case IPPROTO_UDP:
407                         ret = i40e_add_del_fdir_udpv4(vsi, input,
408                                                       raw_packet, add);
409                         break;
410                 case IPPROTO_SCTP:
411                         ret = i40e_add_del_fdir_sctpv4(vsi, input,
412                                                        raw_packet, add);
413                         break;
414                 default:
415                         ret = i40e_add_del_fdir_ipv4(vsi, input,
416                                                      raw_packet, add);
417                         break;
418                 }
419                 break;
420         default:
421                 dev_info(&pf->pdev->dev, "Could not specify spec type %d",
422                          input->flow_type);
423                 ret = -EINVAL;
424         }
425
426         kfree(raw_packet);
427         return ret;
428 }
429
430 /**
431  * i40e_fd_handle_status - check the Programming Status for FD
432  * @rx_ring: the Rx ring for this descriptor
433  * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
434  * @prog_id: the id originally used for programming
435  *
436  * This is used to verify if the FD programming or invalidation
437  * requested by SW to the HW is successful or not and take actions accordingly.
438  **/
439 static void i40e_fd_handle_status(struct i40e_ring *rx_ring,
440                                   union i40e_rx_desc *rx_desc, u8 prog_id)
441 {
442         struct i40e_pf *pf = rx_ring->vsi->back;
443         struct pci_dev *pdev = pf->pdev;
444         u32 fcnt_prog, fcnt_avail;
445         u32 error;
446         u64 qw;
447
448         qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
449         error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
450                 I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
451
452         if (error == (0x1 << I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
453                 dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
454                          rx_desc->wb.qword0.hi_dword.fd_id);
455
456                 /* filter programming failed most likely due to table full */
457                 fcnt_prog = i40e_get_current_fd_count(pf);
458                 fcnt_avail = pf->hw.fdir_shared_filter_count +
459                                                        pf->fdir_pf_filter_count;
460
461                 /* If ATR is running fcnt_prog can quickly change,
462                  * if we are very close to full, it makes sense to disable
463                  * FD ATR/SB and then re-enable it when there is room.
464                  */
465                 if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
466                         /* Turn off ATR first */
467                         if (pf->flags | I40E_FLAG_FD_ATR_ENABLED) {
468                                 pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
469                                 dev_warn(&pdev->dev, "FD filter space full, ATR for further flows will be turned off\n");
470                                 pf->auto_disable_flags |=
471                                                        I40E_FLAG_FD_ATR_ENABLED;
472                                 pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT;
473                         } else if (pf->flags | I40E_FLAG_FD_SB_ENABLED) {
474                                 pf->flags &= ~I40E_FLAG_FD_SB_ENABLED;
475                                 dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
476                                 pf->auto_disable_flags |=
477                                                         I40E_FLAG_FD_SB_ENABLED;
478                                 pf->flags |= I40E_FLAG_FDIR_REQUIRES_REINIT;
479                         }
480                 } else {
481                         dev_info(&pdev->dev, "FD filter programming error");
482                 }
483         } else if (error ==
484                           (0x1 << I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
485                 netdev_info(rx_ring->vsi->netdev, "ntuple filter loc = %d, could not be removed\n",
486                             rx_desc->wb.qword0.hi_dword.fd_id);
487         }
488 }
489
490 /**
491  * i40e_unmap_and_free_tx_resource - Release a Tx buffer
492  * @ring:      the ring that owns the buffer
493  * @tx_buffer: the buffer to free
494  **/
495 static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
496                                             struct i40e_tx_buffer *tx_buffer)
497 {
498         if (tx_buffer->skb) {
499                 dev_kfree_skb_any(tx_buffer->skb);
500                 if (dma_unmap_len(tx_buffer, len))
501                         dma_unmap_single(ring->dev,
502                                          dma_unmap_addr(tx_buffer, dma),
503                                          dma_unmap_len(tx_buffer, len),
504                                          DMA_TO_DEVICE);
505         } else if (dma_unmap_len(tx_buffer, len)) {
506                 dma_unmap_page(ring->dev,
507                                dma_unmap_addr(tx_buffer, dma),
508                                dma_unmap_len(tx_buffer, len),
509                                DMA_TO_DEVICE);
510         }
511         tx_buffer->next_to_watch = NULL;
512         tx_buffer->skb = NULL;
513         dma_unmap_len_set(tx_buffer, len, 0);
514         /* tx_buffer must be completely set up in the transmit path */
515 }
516
517 /**
518  * i40e_clean_tx_ring - Free any empty Tx buffers
519  * @tx_ring: ring to be cleaned
520  **/
521 void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
522 {
523         unsigned long bi_size;
524         u16 i;
525
526         /* ring already cleared, nothing to do */
527         if (!tx_ring->tx_bi)
528                 return;
529
530         /* Free all the Tx ring sk_buffs */
531         for (i = 0; i < tx_ring->count; i++)
532                 i40e_unmap_and_free_tx_resource(tx_ring, &tx_ring->tx_bi[i]);
533
534         bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
535         memset(tx_ring->tx_bi, 0, bi_size);
536
537         /* Zero out the descriptor ring */
538         memset(tx_ring->desc, 0, tx_ring->size);
539
540         tx_ring->next_to_use = 0;
541         tx_ring->next_to_clean = 0;
542
543         if (!tx_ring->netdev)
544                 return;
545
546         /* cleanup Tx queue statistics */
547         netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev,
548                                                   tx_ring->queue_index));
549 }
550
551 /**
552  * i40e_free_tx_resources - Free Tx resources per queue
553  * @tx_ring: Tx descriptor ring for a specific queue
554  *
555  * Free all transmit software resources
556  **/
557 void i40e_free_tx_resources(struct i40e_ring *tx_ring)
558 {
559         i40e_clean_tx_ring(tx_ring);
560         kfree(tx_ring->tx_bi);
561         tx_ring->tx_bi = NULL;
562
563         if (tx_ring->desc) {
564                 dma_free_coherent(tx_ring->dev, tx_ring->size,
565                                   tx_ring->desc, tx_ring->dma);
566                 tx_ring->desc = NULL;
567         }
568 }
569
570 /**
571  * i40e_get_tx_pending - how many tx descriptors not processed
572  * @tx_ring: the ring of descriptors
573  *
574  * Since there is no access to the ring head register
575  * in XL710, we need to use our local copies
576  **/
577 static u32 i40e_get_tx_pending(struct i40e_ring *ring)
578 {
579         u32 ntu = ((ring->next_to_clean <= ring->next_to_use)
580                         ? ring->next_to_use
581                         : ring->next_to_use + ring->count);
582         return ntu - ring->next_to_clean;
583 }
584
585 /**
586  * i40e_check_tx_hang - Is there a hang in the Tx queue
587  * @tx_ring: the ring of descriptors
588  **/
589 static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
590 {
591         u32 tx_pending = i40e_get_tx_pending(tx_ring);
592         bool ret = false;
593
594         clear_check_for_tx_hang(tx_ring);
595
596         /* Check for a hung queue, but be thorough. This verifies
597          * that a transmit has been completed since the previous
598          * check AND there is at least one packet pending. The
599          * ARMED bit is set to indicate a potential hang. The
600          * bit is cleared if a pause frame is received to remove
601          * false hang detection due to PFC or 802.3x frames. By
602          * requiring this to fail twice we avoid races with
603          * PFC clearing the ARMED bit and conditions where we
604          * run the check_tx_hang logic with a transmit completion
605          * pending but without time to complete it yet.
606          */
607         if ((tx_ring->tx_stats.tx_done_old == tx_ring->stats.packets) &&
608             tx_pending) {
609                 /* make sure it is true for two checks in a row */
610                 ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
611                                        &tx_ring->state);
612         } else {
613                 /* update completed stats and disarm the hang check */
614                 tx_ring->tx_stats.tx_done_old = tx_ring->stats.packets;
615                 clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
616         }
617
618         return ret;
619 }
620
621 /**
622  * i40e_get_head - Retrieve head from head writeback
623  * @tx_ring:  tx ring to fetch head of
624  *
625  * Returns value of Tx ring head based on value stored
626  * in head write-back location
627  **/
628 static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
629 {
630         void *head = (struct i40e_tx_desc *)tx_ring->desc + tx_ring->count;
631
632         return le32_to_cpu(*(volatile __le32 *)head);
633 }
634
635 /**
636  * i40e_clean_tx_irq - Reclaim resources after transmit completes
637  * @tx_ring:  tx ring to clean
638  * @budget:   how many cleans we're allowed
639  *
640  * Returns true if there's any budget left (e.g. the clean is finished)
641  **/
642 static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
643 {
644         u16 i = tx_ring->next_to_clean;
645         struct i40e_tx_buffer *tx_buf;
646         struct i40e_tx_desc *tx_head;
647         struct i40e_tx_desc *tx_desc;
648         unsigned int total_packets = 0;
649         unsigned int total_bytes = 0;
650
651         tx_buf = &tx_ring->tx_bi[i];
652         tx_desc = I40E_TX_DESC(tx_ring, i);
653         i -= tx_ring->count;
654
655         tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
656
657         do {
658                 struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
659
660                 /* if next_to_watch is not set then there is no work pending */
661                 if (!eop_desc)
662                         break;
663
664                 /* prevent any other reads prior to eop_desc */
665                 read_barrier_depends();
666
667                 /* we have caught up to head, no work left to do */
668                 if (tx_head == tx_desc)
669                         break;
670
671                 /* clear next_to_watch to prevent false hangs */
672                 tx_buf->next_to_watch = NULL;
673
674                 /* update the statistics for this packet */
675                 total_bytes += tx_buf->bytecount;
676                 total_packets += tx_buf->gso_segs;
677
678                 /* free the skb */
679                 dev_kfree_skb_any(tx_buf->skb);
680
681                 /* unmap skb header data */
682                 dma_unmap_single(tx_ring->dev,
683                                  dma_unmap_addr(tx_buf, dma),
684                                  dma_unmap_len(tx_buf, len),
685                                  DMA_TO_DEVICE);
686
687                 /* clear tx_buffer data */
688                 tx_buf->skb = NULL;
689                 dma_unmap_len_set(tx_buf, len, 0);
690
691                 /* unmap remaining buffers */
692                 while (tx_desc != eop_desc) {
693
694                         tx_buf++;
695                         tx_desc++;
696                         i++;
697                         if (unlikely(!i)) {
698                                 i -= tx_ring->count;
699                                 tx_buf = tx_ring->tx_bi;
700                                 tx_desc = I40E_TX_DESC(tx_ring, 0);
701                         }
702
703                         /* unmap any remaining paged data */
704                         if (dma_unmap_len(tx_buf, len)) {
705                                 dma_unmap_page(tx_ring->dev,
706                                                dma_unmap_addr(tx_buf, dma),
707                                                dma_unmap_len(tx_buf, len),
708                                                DMA_TO_DEVICE);
709                                 dma_unmap_len_set(tx_buf, len, 0);
710                         }
711                 }
712
713                 /* move us one more past the eop_desc for start of next pkt */
714                 tx_buf++;
715                 tx_desc++;
716                 i++;
717                 if (unlikely(!i)) {
718                         i -= tx_ring->count;
719                         tx_buf = tx_ring->tx_bi;
720                         tx_desc = I40E_TX_DESC(tx_ring, 0);
721                 }
722
723                 /* update budget accounting */
724                 budget--;
725         } while (likely(budget));
726
727         i += tx_ring->count;
728         tx_ring->next_to_clean = i;
729         u64_stats_update_begin(&tx_ring->syncp);
730         tx_ring->stats.bytes += total_bytes;
731         tx_ring->stats.packets += total_packets;
732         u64_stats_update_end(&tx_ring->syncp);
733         tx_ring->q_vector->tx.total_bytes += total_bytes;
734         tx_ring->q_vector->tx.total_packets += total_packets;
735
736         if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
737                 /* schedule immediate reset if we believe we hung */
738                 dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
739                          "  VSI                  <%d>\n"
740                          "  Tx Queue             <%d>\n"
741                          "  next_to_use          <%x>\n"
742                          "  next_to_clean        <%x>\n",
743                          tx_ring->vsi->seid,
744                          tx_ring->queue_index,
745                          tx_ring->next_to_use, i);
746                 dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
747                          "  time_stamp           <%lx>\n"
748                          "  jiffies              <%lx>\n",
749                          tx_ring->tx_bi[i].time_stamp, jiffies);
750
751                 netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
752
753                 dev_info(tx_ring->dev,
754                          "tx hang detected on queue %d, resetting adapter\n",
755                          tx_ring->queue_index);
756
757                 tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
758
759                 /* the adapter is about to reset, no point in enabling stuff */
760                 return true;
761         }
762
763         netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
764                                                       tx_ring->queue_index),
765                                   total_packets, total_bytes);
766
767 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
768         if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
769                      (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
770                 /* Make sure that anybody stopping the queue after this
771                  * sees the new next_to_clean.
772                  */
773                 smp_mb();
774                 if (__netif_subqueue_stopped(tx_ring->netdev,
775                                              tx_ring->queue_index) &&
776                    !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
777                         netif_wake_subqueue(tx_ring->netdev,
778                                             tx_ring->queue_index);
779                         ++tx_ring->tx_stats.restart_queue;
780                 }
781         }
782
783         return budget > 0;
784 }
785
786 /**
787  * i40e_set_new_dynamic_itr - Find new ITR level
788  * @rc: structure containing ring performance data
789  *
790  * Stores a new ITR value based on packets and byte counts during
791  * the last interrupt.  The advantage of per interrupt computation
792  * is faster updates and more accurate ITR for the current traffic
793  * pattern.  Constants in this function were computed based on
794  * theoretical maximum wire speed and thresholds were set based on
795  * testing data as well as attempting to minimize response time
796  * while increasing bulk throughput.
797  **/
798 static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
799 {
800         enum i40e_latency_range new_latency_range = rc->latency_range;
801         u32 new_itr = rc->itr;
802         int bytes_per_int;
803
804         if (rc->total_packets == 0 || !rc->itr)
805                 return;
806
807         /* simple throttlerate management
808          *   0-10MB/s   lowest (100000 ints/s)
809          *  10-20MB/s   low    (20000 ints/s)
810          *  20-1249MB/s bulk   (8000 ints/s)
811          */
812         bytes_per_int = rc->total_bytes / rc->itr;
813         switch (rc->itr) {
814         case I40E_LOWEST_LATENCY:
815                 if (bytes_per_int > 10)
816                         new_latency_range = I40E_LOW_LATENCY;
817                 break;
818         case I40E_LOW_LATENCY:
819                 if (bytes_per_int > 20)
820                         new_latency_range = I40E_BULK_LATENCY;
821                 else if (bytes_per_int <= 10)
822                         new_latency_range = I40E_LOWEST_LATENCY;
823                 break;
824         case I40E_BULK_LATENCY:
825                 if (bytes_per_int <= 20)
826                         rc->latency_range = I40E_LOW_LATENCY;
827                 break;
828         }
829
830         switch (new_latency_range) {
831         case I40E_LOWEST_LATENCY:
832                 new_itr = I40E_ITR_100K;
833                 break;
834         case I40E_LOW_LATENCY:
835                 new_itr = I40E_ITR_20K;
836                 break;
837         case I40E_BULK_LATENCY:
838                 new_itr = I40E_ITR_8K;
839                 break;
840         default:
841                 break;
842         }
843
844         if (new_itr != rc->itr) {
845                 /* do an exponential smoothing */
846                 new_itr = (10 * new_itr * rc->itr) /
847                           ((9 * new_itr) + rc->itr);
848                 rc->itr = new_itr & I40E_MAX_ITR;
849         }
850
851         rc->total_bytes = 0;
852         rc->total_packets = 0;
853 }
854
855 /**
856  * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
857  * @q_vector: the vector to adjust
858  **/
859 static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
860 {
861         u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
862         struct i40e_hw *hw = &q_vector->vsi->back->hw;
863         u32 reg_addr;
864         u16 old_itr;
865
866         reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
867         old_itr = q_vector->rx.itr;
868         i40e_set_new_dynamic_itr(&q_vector->rx);
869         if (old_itr != q_vector->rx.itr)
870                 wr32(hw, reg_addr, q_vector->rx.itr);
871
872         reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
873         old_itr = q_vector->tx.itr;
874         i40e_set_new_dynamic_itr(&q_vector->tx);
875         if (old_itr != q_vector->tx.itr)
876                 wr32(hw, reg_addr, q_vector->tx.itr);
877 }
878
879 /**
880  * i40e_clean_programming_status - clean the programming status descriptor
881  * @rx_ring: the rx ring that has this descriptor
882  * @rx_desc: the rx descriptor written back by HW
883  *
884  * Flow director should handle FD_FILTER_STATUS to check its filter programming
885  * status being successful or not and take actions accordingly. FCoE should
886  * handle its context/filter programming/invalidation status and take actions.
887  *
888  **/
889 static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
890                                           union i40e_rx_desc *rx_desc)
891 {
892         u64 qw;
893         u8 id;
894
895         qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
896         id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
897                   I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
898
899         if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
900                 i40e_fd_handle_status(rx_ring, rx_desc, id);
901 }
902
903 /**
904  * i40e_setup_tx_descriptors - Allocate the Tx descriptors
905  * @tx_ring: the tx ring to set up
906  *
907  * Return 0 on success, negative on error
908  **/
909 int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
910 {
911         struct device *dev = tx_ring->dev;
912         int bi_size;
913
914         if (!dev)
915                 return -ENOMEM;
916
917         bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
918         tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
919         if (!tx_ring->tx_bi)
920                 goto err;
921
922         /* round up to nearest 4K */
923         tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
924         /* add u32 for head writeback, align after this takes care of
925          * guaranteeing this is at least one cache line in size
926          */
927         tx_ring->size += sizeof(u32);
928         tx_ring->size = ALIGN(tx_ring->size, 4096);
929         tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
930                                            &tx_ring->dma, GFP_KERNEL);
931         if (!tx_ring->desc) {
932                 dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
933                          tx_ring->size);
934                 goto err;
935         }
936
937         tx_ring->next_to_use = 0;
938         tx_ring->next_to_clean = 0;
939         return 0;
940
941 err:
942         kfree(tx_ring->tx_bi);
943         tx_ring->tx_bi = NULL;
944         return -ENOMEM;
945 }
946
947 /**
948  * i40e_clean_rx_ring - Free Rx buffers
949  * @rx_ring: ring to be cleaned
950  **/
951 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
952 {
953         struct device *dev = rx_ring->dev;
954         struct i40e_rx_buffer *rx_bi;
955         unsigned long bi_size;
956         u16 i;
957
958         /* ring already cleared, nothing to do */
959         if (!rx_ring->rx_bi)
960                 return;
961
962         /* Free all the Rx ring sk_buffs */
963         for (i = 0; i < rx_ring->count; i++) {
964                 rx_bi = &rx_ring->rx_bi[i];
965                 if (rx_bi->dma) {
966                         dma_unmap_single(dev,
967                                          rx_bi->dma,
968                                          rx_ring->rx_buf_len,
969                                          DMA_FROM_DEVICE);
970                         rx_bi->dma = 0;
971                 }
972                 if (rx_bi->skb) {
973                         dev_kfree_skb(rx_bi->skb);
974                         rx_bi->skb = NULL;
975                 }
976                 if (rx_bi->page) {
977                         if (rx_bi->page_dma) {
978                                 dma_unmap_page(dev,
979                                                rx_bi->page_dma,
980                                                PAGE_SIZE / 2,
981                                                DMA_FROM_DEVICE);
982                                 rx_bi->page_dma = 0;
983                         }
984                         __free_page(rx_bi->page);
985                         rx_bi->page = NULL;
986                         rx_bi->page_offset = 0;
987                 }
988         }
989
990         bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
991         memset(rx_ring->rx_bi, 0, bi_size);
992
993         /* Zero out the descriptor ring */
994         memset(rx_ring->desc, 0, rx_ring->size);
995
996         rx_ring->next_to_clean = 0;
997         rx_ring->next_to_use = 0;
998 }
999
1000 /**
1001  * i40e_free_rx_resources - Free Rx resources
1002  * @rx_ring: ring to clean the resources from
1003  *
1004  * Free all receive software resources
1005  **/
1006 void i40e_free_rx_resources(struct i40e_ring *rx_ring)
1007 {
1008         i40e_clean_rx_ring(rx_ring);
1009         kfree(rx_ring->rx_bi);
1010         rx_ring->rx_bi = NULL;
1011
1012         if (rx_ring->desc) {
1013                 dma_free_coherent(rx_ring->dev, rx_ring->size,
1014                                   rx_ring->desc, rx_ring->dma);
1015                 rx_ring->desc = NULL;
1016         }
1017 }
1018
1019 /**
1020  * i40e_setup_rx_descriptors - Allocate Rx descriptors
1021  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
1022  *
1023  * Returns 0 on success, negative on failure
1024  **/
1025 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
1026 {
1027         struct device *dev = rx_ring->dev;
1028         int bi_size;
1029
1030         bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
1031         rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
1032         if (!rx_ring->rx_bi)
1033                 goto err;
1034
1035         /* Round up to nearest 4K */
1036         rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
1037                 ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
1038                 : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
1039         rx_ring->size = ALIGN(rx_ring->size, 4096);
1040         rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
1041                                            &rx_ring->dma, GFP_KERNEL);
1042
1043         if (!rx_ring->desc) {
1044                 dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
1045                          rx_ring->size);
1046                 goto err;
1047         }
1048
1049         rx_ring->next_to_clean = 0;
1050         rx_ring->next_to_use = 0;
1051
1052         return 0;
1053 err:
1054         kfree(rx_ring->rx_bi);
1055         rx_ring->rx_bi = NULL;
1056         return -ENOMEM;
1057 }
1058
1059 /**
1060  * i40e_release_rx_desc - Store the new tail and head values
1061  * @rx_ring: ring to bump
1062  * @val: new head index
1063  **/
1064 static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
1065 {
1066         rx_ring->next_to_use = val;
1067         /* Force memory writes to complete before letting h/w
1068          * know there are new descriptors to fetch.  (Only
1069          * applicable for weak-ordered memory model archs,
1070          * such as IA-64).
1071          */
1072         wmb();
1073         writel(val, rx_ring->tail);
1074 }
1075
1076 /**
1077  * i40e_alloc_rx_buffers - Replace used receive buffers; packet split
1078  * @rx_ring: ring to place buffers on
1079  * @cleaned_count: number of buffers to replace
1080  **/
1081 void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
1082 {
1083         u16 i = rx_ring->next_to_use;
1084         union i40e_rx_desc *rx_desc;
1085         struct i40e_rx_buffer *bi;
1086         struct sk_buff *skb;
1087
1088         /* do nothing if no valid netdev defined */
1089         if (!rx_ring->netdev || !cleaned_count)
1090                 return;
1091
1092         while (cleaned_count--) {
1093                 rx_desc = I40E_RX_DESC(rx_ring, i);
1094                 bi = &rx_ring->rx_bi[i];
1095                 skb = bi->skb;
1096
1097                 if (!skb) {
1098                         skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
1099                                                         rx_ring->rx_buf_len);
1100                         if (!skb) {
1101                                 rx_ring->rx_stats.alloc_buff_failed++;
1102                                 goto no_buffers;
1103                         }
1104                         /* initialize queue mapping */
1105                         skb_record_rx_queue(skb, rx_ring->queue_index);
1106                         bi->skb = skb;
1107                 }
1108
1109                 if (!bi->dma) {
1110                         bi->dma = dma_map_single(rx_ring->dev,
1111                                                  skb->data,
1112                                                  rx_ring->rx_buf_len,
1113                                                  DMA_FROM_DEVICE);
1114                         if (dma_mapping_error(rx_ring->dev, bi->dma)) {
1115                                 rx_ring->rx_stats.alloc_buff_failed++;
1116                                 bi->dma = 0;
1117                                 goto no_buffers;
1118                         }
1119                 }
1120
1121                 if (ring_is_ps_enabled(rx_ring)) {
1122                         if (!bi->page) {
1123                                 bi->page = alloc_page(GFP_ATOMIC);
1124                                 if (!bi->page) {
1125                                         rx_ring->rx_stats.alloc_page_failed++;
1126                                         goto no_buffers;
1127                                 }
1128                         }
1129
1130                         if (!bi->page_dma) {
1131                                 /* use a half page if we're re-using */
1132                                 bi->page_offset ^= PAGE_SIZE / 2;
1133                                 bi->page_dma = dma_map_page(rx_ring->dev,
1134                                                             bi->page,
1135                                                             bi->page_offset,
1136                                                             PAGE_SIZE / 2,
1137                                                             DMA_FROM_DEVICE);
1138                                 if (dma_mapping_error(rx_ring->dev,
1139                                                       bi->page_dma)) {
1140                                         rx_ring->rx_stats.alloc_page_failed++;
1141                                         bi->page_dma = 0;
1142                                         goto no_buffers;
1143                                 }
1144                         }
1145
1146                         /* Refresh the desc even if buffer_addrs didn't change
1147                          * because each write-back erases this info.
1148                          */
1149                         rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
1150                         rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
1151                 } else {
1152                         rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
1153                         rx_desc->read.hdr_addr = 0;
1154                 }
1155                 i++;
1156                 if (i == rx_ring->count)
1157                         i = 0;
1158         }
1159
1160 no_buffers:
1161         if (rx_ring->next_to_use != i)
1162                 i40e_release_rx_desc(rx_ring, i);
1163 }
1164
1165 /**
1166  * i40e_receive_skb - Send a completed packet up the stack
1167  * @rx_ring:  rx ring in play
1168  * @skb: packet to send up
1169  * @vlan_tag: vlan tag for packet
1170  **/
1171 static void i40e_receive_skb(struct i40e_ring *rx_ring,
1172                              struct sk_buff *skb, u16 vlan_tag)
1173 {
1174         struct i40e_q_vector *q_vector = rx_ring->q_vector;
1175         struct i40e_vsi *vsi = rx_ring->vsi;
1176         u64 flags = vsi->back->flags;
1177
1178         if (vlan_tag & VLAN_VID_MASK)
1179                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
1180
1181         if (flags & I40E_FLAG_IN_NETPOLL)
1182                 netif_rx(skb);
1183         else
1184                 napi_gro_receive(&q_vector->napi, skb);
1185 }
1186
1187 /**
1188  * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
1189  * @vsi: the VSI we care about
1190  * @skb: skb currently being received and modified
1191  * @rx_status: status value of last descriptor in packet
1192  * @rx_error: error value of last descriptor in packet
1193  * @rx_ptype: ptype value of last descriptor in packet
1194  **/
1195 static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
1196                                     struct sk_buff *skb,
1197                                     u32 rx_status,
1198                                     u32 rx_error,
1199                                     u16 rx_ptype)
1200 {
1201         bool ipv4_tunnel, ipv6_tunnel;
1202         __wsum rx_udp_csum;
1203         __sum16 csum;
1204         struct iphdr *iph;
1205
1206         ipv4_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
1207                       (rx_ptype < I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
1208         ipv6_tunnel = (rx_ptype > I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
1209                       (rx_ptype < I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
1210
1211         skb->encapsulation = ipv4_tunnel || ipv6_tunnel;
1212         skb->ip_summed = CHECKSUM_NONE;
1213
1214         /* Rx csum enabled and ip headers found? */
1215         if (!(vsi->netdev->features & NETIF_F_RXCSUM &&
1216               rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
1217                 return;
1218
1219         /* likely incorrect csum if alternate IP extension headers found */
1220         if (rx_status & (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
1221                 return;
1222
1223         /* IP or L4 or outmost IP checksum error */
1224         if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1225                         (1 << I40E_RX_DESC_ERROR_L4E_SHIFT) |
1226                         (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT))) {
1227                 vsi->back->hw_csum_rx_error++;
1228                 return;
1229         }
1230
1231         if (ipv4_tunnel &&
1232             !(rx_status & (1 << I40E_RX_DESC_STATUS_UDP_0_SHIFT))) {
1233                 /* If VXLAN traffic has an outer UDPv4 checksum we need to check
1234                  * it in the driver, hardware does not do it for us.
1235                  * Since L3L4P bit was set we assume a valid IHL value (>=5)
1236                  * so the total length of IPv4 header is IHL*4 bytes
1237                  */
1238                 skb->transport_header = skb->mac_header +
1239                                         sizeof(struct ethhdr) +
1240                                         (ip_hdr(skb)->ihl * 4);
1241
1242                 /* Add 4 bytes for VLAN tagged packets */
1243                 skb->transport_header += (skb->protocol == htons(ETH_P_8021Q) ||
1244                                           skb->protocol == htons(ETH_P_8021AD))
1245                                           ? VLAN_HLEN : 0;
1246
1247                 rx_udp_csum = udp_csum(skb);
1248                 iph = ip_hdr(skb);
1249                 csum = csum_tcpudp_magic(
1250                                 iph->saddr, iph->daddr,
1251                                 (skb->len - skb_transport_offset(skb)),
1252                                 IPPROTO_UDP, rx_udp_csum);
1253
1254                 if (udp_hdr(skb)->check != csum) {
1255                         vsi->back->hw_csum_rx_error++;
1256                         return;
1257                 }
1258         }
1259
1260         skb->ip_summed = CHECKSUM_UNNECESSARY;
1261 }
1262
1263 /**
1264  * i40e_rx_hash - returns the hash value from the Rx descriptor
1265  * @ring: descriptor ring
1266  * @rx_desc: specific descriptor
1267  **/
1268 static inline u32 i40e_rx_hash(struct i40e_ring *ring,
1269                                union i40e_rx_desc *rx_desc)
1270 {
1271         const __le64 rss_mask =
1272                 cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
1273                             I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
1274
1275         if ((ring->netdev->features & NETIF_F_RXHASH) &&
1276             (rx_desc->wb.qword1.status_error_len & rss_mask) == rss_mask)
1277                 return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
1278         else
1279                 return 0;
1280 }
1281
1282 /**
1283  * i40e_ptype_to_hash - get a hash type
1284  * @ptype: the ptype value from the descriptor
1285  *
1286  * Returns a hash type to be used by skb_set_hash
1287  **/
1288 static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
1289 {
1290         struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
1291
1292         if (!decoded.known)
1293                 return PKT_HASH_TYPE_NONE;
1294
1295         if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1296             decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
1297                 return PKT_HASH_TYPE_L4;
1298         else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1299                  decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
1300                 return PKT_HASH_TYPE_L3;
1301         else
1302                 return PKT_HASH_TYPE_L2;
1303 }
1304
1305 /**
1306  * i40e_clean_rx_irq - Reclaim resources after receive completes
1307  * @rx_ring:  rx ring to clean
1308  * @budget:   how many cleans we're allowed
1309  *
1310  * Returns true if there's any budget left (e.g. the clean is finished)
1311  **/
1312 static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
1313 {
1314         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
1315         u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
1316         u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
1317         const int current_node = numa_node_id();
1318         struct i40e_vsi *vsi = rx_ring->vsi;
1319         u16 i = rx_ring->next_to_clean;
1320         union i40e_rx_desc *rx_desc;
1321         u32 rx_error, rx_status;
1322         u8 rx_ptype;
1323         u64 qword;
1324
1325         if (budget <= 0)
1326                 return 0;
1327
1328         rx_desc = I40E_RX_DESC(rx_ring, i);
1329         qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1330         rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1331                     I40E_RXD_QW1_STATUS_SHIFT;
1332
1333         while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
1334                 union i40e_rx_desc *next_rxd;
1335                 struct i40e_rx_buffer *rx_bi;
1336                 struct sk_buff *skb;
1337                 u16 vlan_tag;
1338                 if (i40e_rx_is_programming_status(qword)) {
1339                         i40e_clean_programming_status(rx_ring, rx_desc);
1340                         I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1341                         goto next_desc;
1342                 }
1343                 rx_bi = &rx_ring->rx_bi[i];
1344                 skb = rx_bi->skb;
1345                 prefetch(skb->data);
1346
1347                 rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
1348                                 I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1349                 rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
1350                                 I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1351                 rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
1352                          I40E_RXD_QW1_LENGTH_SPH_SHIFT;
1353
1354                 rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
1355                            I40E_RXD_QW1_ERROR_SHIFT;
1356                 rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1357                 rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
1358
1359                 rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
1360                            I40E_RXD_QW1_PTYPE_SHIFT;
1361                 rx_bi->skb = NULL;
1362
1363                 /* This memory barrier is needed to keep us from reading
1364                  * any other fields out of the rx_desc until we know the
1365                  * STATUS_DD bit is set
1366                  */
1367                 rmb();
1368
1369                 /* Get the header and possibly the whole packet
1370                  * If this is an skb from previous receive dma will be 0
1371                  */
1372                 if (rx_bi->dma) {
1373                         u16 len;
1374
1375                         if (rx_hbo)
1376                                 len = I40E_RX_HDR_SIZE;
1377                         else if (rx_sph)
1378                                 len = rx_header_len;
1379                         else if (rx_packet_len)
1380                                 len = rx_packet_len;   /* 1buf/no split found */
1381                         else
1382                                 len = rx_header_len;   /* split always mode */
1383
1384                         skb_put(skb, len);
1385                         dma_unmap_single(rx_ring->dev,
1386                                          rx_bi->dma,
1387                                          rx_ring->rx_buf_len,
1388                                          DMA_FROM_DEVICE);
1389                         rx_bi->dma = 0;
1390                 }
1391
1392                 /* Get the rest of the data if this was a header split */
1393                 if (ring_is_ps_enabled(rx_ring) && rx_packet_len) {
1394
1395                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
1396                                            rx_bi->page,
1397                                            rx_bi->page_offset,
1398                                            rx_packet_len);
1399
1400                         skb->len += rx_packet_len;
1401                         skb->data_len += rx_packet_len;
1402                         skb->truesize += rx_packet_len;
1403
1404                         if ((page_count(rx_bi->page) == 1) &&
1405                             (page_to_nid(rx_bi->page) == current_node))
1406                                 get_page(rx_bi->page);
1407                         else
1408                                 rx_bi->page = NULL;
1409
1410                         dma_unmap_page(rx_ring->dev,
1411                                        rx_bi->page_dma,
1412                                        PAGE_SIZE / 2,
1413                                        DMA_FROM_DEVICE);
1414                         rx_bi->page_dma = 0;
1415                 }
1416                 I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
1417
1418                 if (unlikely(
1419                     !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
1420                         struct i40e_rx_buffer *next_buffer;
1421
1422                         next_buffer = &rx_ring->rx_bi[i];
1423
1424                         if (ring_is_ps_enabled(rx_ring)) {
1425                                 rx_bi->skb = next_buffer->skb;
1426                                 rx_bi->dma = next_buffer->dma;
1427                                 next_buffer->skb = skb;
1428                                 next_buffer->dma = 0;
1429                         }
1430                         rx_ring->rx_stats.non_eop_descs++;
1431                         goto next_desc;
1432                 }
1433
1434                 /* ERR_MASK will only have valid bits if EOP set */
1435                 if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1436                         dev_kfree_skb_any(skb);
1437                         goto next_desc;
1438                 }
1439
1440                 skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
1441                              i40e_ptype_to_hash(rx_ptype));
1442                 if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
1443                         i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
1444                                            I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
1445                                            I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
1446                         rx_ring->last_rx_timestamp = jiffies;
1447                 }
1448
1449                 /* probably a little skewed due to removing CRC */
1450                 total_rx_bytes += skb->len;
1451                 total_rx_packets++;
1452
1453                 skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1454
1455                 i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
1456
1457                 vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
1458                          ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
1459                          : 0;
1460                 i40e_receive_skb(rx_ring, skb, vlan_tag);
1461
1462                 rx_ring->netdev->last_rx = jiffies;
1463                 budget--;
1464 next_desc:
1465                 rx_desc->wb.qword1.status_error_len = 0;
1466                 if (!budget)
1467                         break;
1468
1469                 cleaned_count++;
1470                 /* return some buffers to hardware, one at a time is too slow */
1471                 if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
1472                         i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1473                         cleaned_count = 0;
1474                 }
1475
1476                 /* use prefetched values */
1477                 rx_desc = next_rxd;
1478                 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
1479                 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
1480                             I40E_RXD_QW1_STATUS_SHIFT;
1481         }
1482
1483         rx_ring->next_to_clean = i;
1484         u64_stats_update_begin(&rx_ring->syncp);
1485         rx_ring->stats.packets += total_rx_packets;
1486         rx_ring->stats.bytes += total_rx_bytes;
1487         u64_stats_update_end(&rx_ring->syncp);
1488         rx_ring->q_vector->rx.total_packets += total_rx_packets;
1489         rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
1490
1491         if (cleaned_count)
1492                 i40e_alloc_rx_buffers(rx_ring, cleaned_count);
1493
1494         return budget > 0;
1495 }
1496
1497 /**
1498  * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
1499  * @napi: napi struct with our devices info in it
1500  * @budget: amount of work driver is allowed to do this pass, in packets
1501  *
1502  * This function will clean all queues associated with a q_vector.
1503  *
1504  * Returns the amount of work done
1505  **/
1506 int i40e_napi_poll(struct napi_struct *napi, int budget)
1507 {
1508         struct i40e_q_vector *q_vector =
1509                                container_of(napi, struct i40e_q_vector, napi);
1510         struct i40e_vsi *vsi = q_vector->vsi;
1511         struct i40e_ring *ring;
1512         bool clean_complete = true;
1513         int budget_per_ring;
1514
1515         if (test_bit(__I40E_DOWN, &vsi->state)) {
1516                 napi_complete(napi);
1517                 return 0;
1518         }
1519
1520         /* Since the actual Tx work is minimal, we can give the Tx a larger
1521          * budget and be more aggressive about cleaning up the Tx descriptors.
1522          */
1523         i40e_for_each_ring(ring, q_vector->tx)
1524                 clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
1525
1526         /* We attempt to distribute budget to each Rx queue fairly, but don't
1527          * allow the budget to go below 1 because that would exit polling early.
1528          */
1529         budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
1530
1531         i40e_for_each_ring(ring, q_vector->rx)
1532                 clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
1533
1534         /* If work not completed, return budget and polling will return */
1535         if (!clean_complete)
1536                 return budget;
1537
1538         /* Work is done so exit the polling mode and re-enable the interrupt */
1539         napi_complete(napi);
1540         if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
1541             ITR_IS_DYNAMIC(vsi->tx_itr_setting))
1542                 i40e_update_dynamic_itr(q_vector);
1543
1544         if (!test_bit(__I40E_DOWN, &vsi->state)) {
1545                 if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
1546                         i40e_irq_dynamic_enable(vsi,
1547                                         q_vector->v_idx + vsi->base_vector);
1548                 } else {
1549                         struct i40e_hw *hw = &vsi->back->hw;
1550                         /* We re-enable the queue 0 cause, but
1551                          * don't worry about dynamic_enable
1552                          * because we left it on for the other
1553                          * possible interrupts during napi
1554                          */
1555                         u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
1556                         qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
1557                         wr32(hw, I40E_QINT_RQCTL(0), qval);
1558
1559                         qval = rd32(hw, I40E_QINT_TQCTL(0));
1560                         qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
1561                         wr32(hw, I40E_QINT_TQCTL(0), qval);
1562
1563                         i40e_irq_dynamic_enable_icr0(vsi->back);
1564                 }
1565         }
1566
1567         return 0;
1568 }
1569
1570 /**
1571  * i40e_atr - Add a Flow Director ATR filter
1572  * @tx_ring:  ring to add programming descriptor to
1573  * @skb:      send buffer
1574  * @flags:    send flags
1575  * @protocol: wire protocol
1576  **/
1577 static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
1578                      u32 flags, __be16 protocol)
1579 {
1580         struct i40e_filter_program_desc *fdir_desc;
1581         struct i40e_pf *pf = tx_ring->vsi->back;
1582         union {
1583                 unsigned char *network;
1584                 struct iphdr *ipv4;
1585                 struct ipv6hdr *ipv6;
1586         } hdr;
1587         struct tcphdr *th;
1588         unsigned int hlen;
1589         u32 flex_ptype, dtype_cmd;
1590         u16 i;
1591
1592         /* make sure ATR is enabled */
1593         if (!(pf->flags & I40E_FLAG_FD_ATR_ENABLED))
1594                 return;
1595
1596         /* if sampling is disabled do nothing */
1597         if (!tx_ring->atr_sample_rate)
1598                 return;
1599
1600         /* snag network header to get L4 type and address */
1601         hdr.network = skb_network_header(skb);
1602
1603         /* Currently only IPv4/IPv6 with TCP is supported */
1604         if (protocol == htons(ETH_P_IP)) {
1605                 if (hdr.ipv4->protocol != IPPROTO_TCP)
1606                         return;
1607
1608                 /* access ihl as a u8 to avoid unaligned access on ia64 */
1609                 hlen = (hdr.network[0] & 0x0F) << 2;
1610         } else if (protocol == htons(ETH_P_IPV6)) {
1611                 if (hdr.ipv6->nexthdr != IPPROTO_TCP)
1612                         return;
1613
1614                 hlen = sizeof(struct ipv6hdr);
1615         } else {
1616                 return;
1617         }
1618
1619         th = (struct tcphdr *)(hdr.network + hlen);
1620
1621         /* Due to lack of space, no more new filters can be programmed */
1622         if (th->syn && (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED))
1623                 return;
1624
1625         tx_ring->atr_count++;
1626
1627         /* sample on all syn/fin packets or once every atr sample rate */
1628         if (!th->fin && !th->syn && (tx_ring->atr_count < tx_ring->atr_sample_rate))
1629                 return;
1630
1631         tx_ring->atr_count = 0;
1632
1633         /* grab the next descriptor */
1634         i = tx_ring->next_to_use;
1635         fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
1636
1637         i++;
1638         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1639
1640         flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
1641                       I40E_TXD_FLTR_QW0_QINDEX_MASK;
1642         flex_ptype |= (protocol == htons(ETH_P_IP)) ?
1643                       (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
1644                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
1645                       (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
1646                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
1647
1648         flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
1649
1650         dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
1651
1652         dtype_cmd |= th->fin ?
1653                      (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
1654                       I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
1655                      (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
1656                       I40E_TXD_FLTR_QW1_PCMD_SHIFT);
1657
1658         dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
1659                      I40E_TXD_FLTR_QW1_DEST_SHIFT;
1660
1661         dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
1662                      I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
1663
1664         fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
1665         fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
1666 }
1667
1668 /**
1669  * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
1670  * @skb:     send buffer
1671  * @tx_ring: ring to send buffer on
1672  * @flags:   the tx flags to be set
1673  *
1674  * Checks the skb and set up correspondingly several generic transmit flags
1675  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
1676  *
1677  * Returns error code indicate the frame should be dropped upon error and the
1678  * otherwise  returns 0 to indicate the flags has been set properly.
1679  **/
1680 static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
1681                                       struct i40e_ring *tx_ring,
1682                                       u32 *flags)
1683 {
1684         __be16 protocol = skb->protocol;
1685         u32  tx_flags = 0;
1686
1687         /* if we have a HW VLAN tag being added, default to the HW one */
1688         if (vlan_tx_tag_present(skb)) {
1689                 tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
1690                 tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1691         /* else if it is a SW VLAN, check the next protocol and store the tag */
1692         } else if (protocol == htons(ETH_P_8021Q)) {
1693                 struct vlan_hdr *vhdr, _vhdr;
1694                 vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
1695                 if (!vhdr)
1696                         return -EINVAL;
1697
1698                 protocol = vhdr->h_vlan_encapsulated_proto;
1699                 tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
1700                 tx_flags |= I40E_TX_FLAGS_SW_VLAN;
1701         }
1702
1703         /* Insert 802.1p priority into VLAN header */
1704         if ((tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED) &&
1705             ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
1706              (skb->priority != TC_PRIO_CONTROL))) {
1707                 tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
1708                 tx_flags |= (skb->priority & 0x7) <<
1709                                 I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
1710                 if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
1711                         struct vlan_ethhdr *vhdr;
1712                         if (skb_header_cloned(skb) &&
1713                             pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1714                                 return -ENOMEM;
1715                         vhdr = (struct vlan_ethhdr *)skb->data;
1716                         vhdr->h_vlan_TCI = htons(tx_flags >>
1717                                                  I40E_TX_FLAGS_VLAN_SHIFT);
1718                 } else {
1719                         tx_flags |= I40E_TX_FLAGS_HW_VLAN;
1720                 }
1721         }
1722         *flags = tx_flags;
1723         return 0;
1724 }
1725
1726 /**
1727  * i40e_tso - set up the tso context descriptor
1728  * @tx_ring:  ptr to the ring to send
1729  * @skb:      ptr to the skb we're sending
1730  * @tx_flags: the collected send information
1731  * @protocol: the send protocol
1732  * @hdr_len:  ptr to the size of the packet header
1733  * @cd_tunneling: ptr to context descriptor bits
1734  *
1735  * Returns 0 if no TSO can happen, 1 if tso is going, or error
1736  **/
1737 static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
1738                     u32 tx_flags, __be16 protocol, u8 *hdr_len,
1739                     u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
1740 {
1741         u32 cd_cmd, cd_tso_len, cd_mss;
1742         struct tcphdr *tcph;
1743         struct iphdr *iph;
1744         u32 l4len;
1745         int err;
1746         struct ipv6hdr *ipv6h;
1747
1748         if (!skb_is_gso(skb))
1749                 return 0;
1750
1751         if (skb_header_cloned(skb)) {
1752                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1753                 if (err)
1754                         return err;
1755         }
1756
1757         if (protocol == htons(ETH_P_IP)) {
1758                 iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
1759                 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1760                 iph->tot_len = 0;
1761                 iph->check = 0;
1762                 tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
1763                                                  0, IPPROTO_TCP, 0);
1764         } else if (skb_is_gso_v6(skb)) {
1765
1766                 ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
1767                                            : ipv6_hdr(skb);
1768                 tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
1769                 ipv6h->payload_len = 0;
1770                 tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
1771                                                0, IPPROTO_TCP, 0);
1772         }
1773
1774         l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
1775         *hdr_len = (skb->encapsulation
1776                     ? (skb_inner_transport_header(skb) - skb->data)
1777                     : skb_transport_offset(skb)) + l4len;
1778
1779         /* find the field values */
1780         cd_cmd = I40E_TX_CTX_DESC_TSO;
1781         cd_tso_len = skb->len - *hdr_len;
1782         cd_mss = skb_shinfo(skb)->gso_size;
1783         *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
1784                                 ((u64)cd_tso_len <<
1785                                  I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
1786                                 ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
1787         return 1;
1788 }
1789
1790 /**
1791  * i40e_tsyn - set up the tsyn context descriptor
1792  * @tx_ring:  ptr to the ring to send
1793  * @skb:      ptr to the skb we're sending
1794  * @tx_flags: the collected send information
1795  *
1796  * Returns 0 if no Tx timestamp can happen and 1 if the timestamp will happen
1797  **/
1798 static int i40e_tsyn(struct i40e_ring *tx_ring, struct sk_buff *skb,
1799                      u32 tx_flags, u64 *cd_type_cmd_tso_mss)
1800 {
1801         struct i40e_pf *pf;
1802
1803         if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)))
1804                 return 0;
1805
1806         /* Tx timestamps cannot be sampled when doing TSO */
1807         if (tx_flags & I40E_TX_FLAGS_TSO)
1808                 return 0;
1809
1810         /* only timestamp the outbound packet if the user has requested it and
1811          * we are not already transmitting a packet to be timestamped
1812          */
1813         pf = i40e_netdev_to_pf(tx_ring->netdev);
1814         if (pf->ptp_tx && !pf->ptp_tx_skb) {
1815                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1816                 pf->ptp_tx_skb = skb_get(skb);
1817         } else {
1818                 return 0;
1819         }
1820
1821         *cd_type_cmd_tso_mss |= (u64)I40E_TX_CTX_DESC_TSYN <<
1822                                 I40E_TXD_CTX_QW1_CMD_SHIFT;
1823
1824         pf->ptp_tx_start = jiffies;
1825         schedule_work(&pf->ptp_tx_work);
1826
1827         return 1;
1828 }
1829
1830 /**
1831  * i40e_tx_enable_csum - Enable Tx checksum offloads
1832  * @skb: send buffer
1833  * @tx_flags: Tx flags currently set
1834  * @td_cmd: Tx descriptor command bits to set
1835  * @td_offset: Tx descriptor header offsets to set
1836  * @cd_tunneling: ptr to context desc bits
1837  **/
1838 static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
1839                                 u32 *td_cmd, u32 *td_offset,
1840                                 struct i40e_ring *tx_ring,
1841                                 u32 *cd_tunneling)
1842 {
1843         struct ipv6hdr *this_ipv6_hdr;
1844         unsigned int this_tcp_hdrlen;
1845         struct iphdr *this_ip_hdr;
1846         u32 network_hdr_len;
1847         u8 l4_hdr = 0;
1848
1849         if (skb->encapsulation) {
1850                 network_hdr_len = skb_inner_network_header_len(skb);
1851                 this_ip_hdr = inner_ip_hdr(skb);
1852                 this_ipv6_hdr = inner_ipv6_hdr(skb);
1853                 this_tcp_hdrlen = inner_tcp_hdrlen(skb);
1854
1855                 if (tx_flags & I40E_TX_FLAGS_IPV4) {
1856
1857                         if (tx_flags & I40E_TX_FLAGS_TSO) {
1858                                 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
1859                                 ip_hdr(skb)->check = 0;
1860                         } else {
1861                                 *cd_tunneling |=
1862                                          I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1863                         }
1864                 } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1865                         if (tx_flags & I40E_TX_FLAGS_TSO) {
1866                                 *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
1867                                 ip_hdr(skb)->check = 0;
1868                         } else {
1869                                 *cd_tunneling |=
1870                                          I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
1871                         }
1872                 }
1873
1874                 /* Now set the ctx descriptor fields */
1875                 *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
1876                                         I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
1877                                    I40E_TXD_CTX_UDP_TUNNELING            |
1878                                    ((skb_inner_network_offset(skb) -
1879                                         skb_transport_offset(skb)) >> 1) <<
1880                                    I40E_TXD_CTX_QW0_NATLEN_SHIFT;
1881
1882         } else {
1883                 network_hdr_len = skb_network_header_len(skb);
1884                 this_ip_hdr = ip_hdr(skb);
1885                 this_ipv6_hdr = ipv6_hdr(skb);
1886                 this_tcp_hdrlen = tcp_hdrlen(skb);
1887         }
1888
1889         /* Enable IP checksum offloads */
1890         if (tx_flags & I40E_TX_FLAGS_IPV4) {
1891                 l4_hdr = this_ip_hdr->protocol;
1892                 /* the stack computes the IP header already, the only time we
1893                  * need the hardware to recompute it is in the case of TSO.
1894                  */
1895                 if (tx_flags & I40E_TX_FLAGS_TSO) {
1896                         *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
1897                         this_ip_hdr->check = 0;
1898                 } else {
1899                         *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
1900                 }
1901                 /* Now set the td_offset for IP header length */
1902                 *td_offset = (network_hdr_len >> 2) <<
1903                               I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1904         } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
1905                 l4_hdr = this_ipv6_hdr->nexthdr;
1906                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
1907                 /* Now set the td_offset for IP header length */
1908                 *td_offset = (network_hdr_len >> 2) <<
1909                               I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
1910         }
1911         /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
1912         *td_offset |= (skb_network_offset(skb) >> 1) <<
1913                        I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
1914
1915         /* Enable L4 checksum offloads */
1916         switch (l4_hdr) {
1917         case IPPROTO_TCP:
1918                 /* enable checksum offloads */
1919                 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
1920                 *td_offset |= (this_tcp_hdrlen >> 2) <<
1921                                I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1922                 break;
1923         case IPPROTO_SCTP:
1924                 /* enable SCTP checksum offload */
1925                 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
1926                 *td_offset |= (sizeof(struct sctphdr) >> 2) <<
1927                                I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1928                 break;
1929         case IPPROTO_UDP:
1930                 /* enable UDP checksum offload */
1931                 *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
1932                 *td_offset |= (sizeof(struct udphdr) >> 2) <<
1933                                I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
1934                 break;
1935         default:
1936                 break;
1937         }
1938 }
1939
1940 /**
1941  * i40e_create_tx_ctx Build the Tx context descriptor
1942  * @tx_ring:  ring to create the descriptor on
1943  * @cd_type_cmd_tso_mss: Quad Word 1
1944  * @cd_tunneling: Quad Word 0 - bits 0-31
1945  * @cd_l2tag2: Quad Word 0 - bits 32-63
1946  **/
1947 static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
1948                                const u64 cd_type_cmd_tso_mss,
1949                                const u32 cd_tunneling, const u32 cd_l2tag2)
1950 {
1951         struct i40e_tx_context_desc *context_desc;
1952         int i = tx_ring->next_to_use;
1953
1954         if ((cd_type_cmd_tso_mss == I40E_TX_DESC_DTYPE_CONTEXT) &&
1955             !cd_tunneling && !cd_l2tag2)
1956                 return;
1957
1958         /* grab the next descriptor */
1959         context_desc = I40E_TX_CTXTDESC(tx_ring, i);
1960
1961         i++;
1962         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1963
1964         /* cpu_to_le32 and assign to struct fields */
1965         context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
1966         context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
1967         context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
1968 }
1969
1970 /**
1971  * i40e_tx_map - Build the Tx descriptor
1972  * @tx_ring:  ring to send buffer on
1973  * @skb:      send buffer
1974  * @first:    first buffer info buffer to use
1975  * @tx_flags: collected send information
1976  * @hdr_len:  size of the packet header
1977  * @td_cmd:   the command field in the descriptor
1978  * @td_offset: offset for checksum or crc
1979  **/
1980 static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
1981                         struct i40e_tx_buffer *first, u32 tx_flags,
1982                         const u8 hdr_len, u32 td_cmd, u32 td_offset)
1983 {
1984         unsigned int data_len = skb->data_len;
1985         unsigned int size = skb_headlen(skb);
1986         struct skb_frag_struct *frag;
1987         struct i40e_tx_buffer *tx_bi;
1988         struct i40e_tx_desc *tx_desc;
1989         u16 i = tx_ring->next_to_use;
1990         u32 td_tag = 0;
1991         dma_addr_t dma;
1992         u16 gso_segs;
1993
1994         if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
1995                 td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
1996                 td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
1997                          I40E_TX_FLAGS_VLAN_SHIFT;
1998         }
1999
2000         if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
2001                 gso_segs = skb_shinfo(skb)->gso_segs;
2002         else
2003                 gso_segs = 1;
2004
2005         /* multiply data chunks by size of headers */
2006         first->bytecount = skb->len - hdr_len + (gso_segs * hdr_len);
2007         first->gso_segs = gso_segs;
2008         first->skb = skb;
2009         first->tx_flags = tx_flags;
2010
2011         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
2012
2013         tx_desc = I40E_TX_DESC(tx_ring, i);
2014         tx_bi = first;
2015
2016         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
2017                 if (dma_mapping_error(tx_ring->dev, dma))
2018                         goto dma_error;
2019
2020                 /* record length, and DMA address */
2021                 dma_unmap_len_set(tx_bi, len, size);
2022                 dma_unmap_addr_set(tx_bi, dma, dma);
2023
2024                 tx_desc->buffer_addr = cpu_to_le64(dma);
2025
2026                 while (unlikely(size > I40E_MAX_DATA_PER_TXD)) {
2027                         tx_desc->cmd_type_offset_bsz =
2028                                 build_ctob(td_cmd, td_offset,
2029                                            I40E_MAX_DATA_PER_TXD, td_tag);
2030
2031                         tx_desc++;
2032                         i++;
2033                         if (i == tx_ring->count) {
2034                                 tx_desc = I40E_TX_DESC(tx_ring, 0);
2035                                 i = 0;
2036                         }
2037
2038                         dma += I40E_MAX_DATA_PER_TXD;
2039                         size -= I40E_MAX_DATA_PER_TXD;
2040
2041                         tx_desc->buffer_addr = cpu_to_le64(dma);
2042                 }
2043
2044                 if (likely(!data_len))
2045                         break;
2046
2047                 tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
2048                                                           size, td_tag);
2049
2050                 tx_desc++;
2051                 i++;
2052                 if (i == tx_ring->count) {
2053                         tx_desc = I40E_TX_DESC(tx_ring, 0);
2054                         i = 0;
2055                 }
2056
2057                 size = skb_frag_size(frag);
2058                 data_len -= size;
2059
2060                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
2061                                        DMA_TO_DEVICE);
2062
2063                 tx_bi = &tx_ring->tx_bi[i];
2064         }
2065
2066         /* Place RS bit on last descriptor of any packet that spans across the
2067          * 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
2068          */
2069 #define WB_STRIDE 0x3
2070         if (((i & WB_STRIDE) != WB_STRIDE) &&
2071             (first <= &tx_ring->tx_bi[i]) &&
2072             (first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
2073                 tx_desc->cmd_type_offset_bsz =
2074                         build_ctob(td_cmd, td_offset, size, td_tag) |
2075                         cpu_to_le64((u64)I40E_TX_DESC_CMD_EOP <<
2076                                          I40E_TXD_QW1_CMD_SHIFT);
2077         } else {
2078                 tx_desc->cmd_type_offset_bsz =
2079                         build_ctob(td_cmd, td_offset, size, td_tag) |
2080                         cpu_to_le64((u64)I40E_TXD_CMD <<
2081                                          I40E_TXD_QW1_CMD_SHIFT);
2082         }
2083
2084         netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev,
2085                                                  tx_ring->queue_index),
2086                              first->bytecount);
2087
2088         /* set the timestamp */
2089         first->time_stamp = jiffies;
2090
2091         /* Force memory writes to complete before letting h/w
2092          * know there are new descriptors to fetch.  (Only
2093          * applicable for weak-ordered memory model archs,
2094          * such as IA-64).
2095          */
2096         wmb();
2097
2098         /* set next_to_watch value indicating a packet is present */
2099         first->next_to_watch = tx_desc;
2100
2101         i++;
2102         if (i == tx_ring->count)
2103                 i = 0;
2104
2105         tx_ring->next_to_use = i;
2106
2107         /* notify HW of packet */
2108         writel(i, tx_ring->tail);
2109
2110         return;
2111
2112 dma_error:
2113         dev_info(tx_ring->dev, "TX DMA map failed\n");
2114
2115         /* clear dma mappings for failed tx_bi map */
2116         for (;;) {
2117                 tx_bi = &tx_ring->tx_bi[i];
2118                 i40e_unmap_and_free_tx_resource(tx_ring, tx_bi);
2119                 if (tx_bi == first)
2120                         break;
2121                 if (i == 0)
2122                         i = tx_ring->count;
2123                 i--;
2124         }
2125
2126         tx_ring->next_to_use = i;
2127 }
2128
2129 /**
2130  * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
2131  * @tx_ring: the ring to be checked
2132  * @size:    the size buffer we want to assure is available
2133  *
2134  * Returns -EBUSY if a stop is needed, else 0
2135  **/
2136 static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2137 {
2138         netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
2139         /* Memory barrier before checking head and tail */
2140         smp_mb();
2141
2142         /* Check again in a case another CPU has just made room available. */
2143         if (likely(I40E_DESC_UNUSED(tx_ring) < size))
2144                 return -EBUSY;
2145
2146         /* A reprieve! - use start_queue because it doesn't call schedule */
2147         netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
2148         ++tx_ring->tx_stats.restart_queue;
2149         return 0;
2150 }
2151
2152 /**
2153  * i40e_maybe_stop_tx - 1st level check for tx stop conditions
2154  * @tx_ring: the ring to be checked
2155  * @size:    the size buffer we want to assure is available
2156  *
2157  * Returns 0 if stop is not needed
2158  **/
2159 static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
2160 {
2161         if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
2162                 return 0;
2163         return __i40e_maybe_stop_tx(tx_ring, size);
2164 }
2165
2166 /**
2167  * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
2168  * @skb:     send buffer
2169  * @tx_ring: ring to send buffer on
2170  *
2171  * Returns number of data descriptors needed for this skb. Returns 0 to indicate
2172  * there is not enough descriptors available in this ring since we need at least
2173  * one descriptor.
2174  **/
2175 static int i40e_xmit_descriptor_count(struct sk_buff *skb,
2176                                       struct i40e_ring *tx_ring)
2177 {
2178 #if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
2179         unsigned int f;
2180 #endif
2181         int count = 0;
2182
2183         /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
2184          *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
2185          *       + 4 desc gap to avoid the cache line where head is,
2186          *       + 1 desc for context descriptor,
2187          * otherwise try next time
2188          */
2189 #if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
2190         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
2191                 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
2192 #else
2193         count += skb_shinfo(skb)->nr_frags;
2194 #endif
2195         count += TXD_USE_COUNT(skb_headlen(skb));
2196         if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
2197                 tx_ring->tx_stats.tx_busy++;
2198                 return 0;
2199         }
2200         return count;
2201 }
2202
2203 /**
2204  * i40e_xmit_frame_ring - Sends buffer on Tx ring
2205  * @skb:     send buffer
2206  * @tx_ring: ring to send buffer on
2207  *
2208  * Returns NETDEV_TX_OK if sent, else an error code
2209  **/
2210 static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
2211                                         struct i40e_ring *tx_ring)
2212 {
2213         u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
2214         u32 cd_tunneling = 0, cd_l2tag2 = 0;
2215         struct i40e_tx_buffer *first;
2216         u32 td_offset = 0;
2217         u32 tx_flags = 0;
2218         __be16 protocol;
2219         u32 td_cmd = 0;
2220         u8 hdr_len = 0;
2221         int tsyn;
2222         int tso;
2223         if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
2224                 return NETDEV_TX_BUSY;
2225
2226         /* prepare the xmit flags */
2227         if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
2228                 goto out_drop;
2229
2230         /* obtain protocol of skb */
2231         protocol = skb->protocol;
2232
2233         /* record the location of the first descriptor for this packet */
2234         first = &tx_ring->tx_bi[tx_ring->next_to_use];
2235
2236         /* setup IPv4/IPv6 offloads */
2237         if (protocol == htons(ETH_P_IP))
2238                 tx_flags |= I40E_TX_FLAGS_IPV4;
2239         else if (protocol == htons(ETH_P_IPV6))
2240                 tx_flags |= I40E_TX_FLAGS_IPV6;
2241
2242         tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
2243                        &cd_type_cmd_tso_mss, &cd_tunneling);
2244
2245         if (tso < 0)
2246                 goto out_drop;
2247         else if (tso)
2248                 tx_flags |= I40E_TX_FLAGS_TSO;
2249
2250         skb_tx_timestamp(skb);
2251
2252         tsyn = i40e_tsyn(tx_ring, skb, tx_flags, &cd_type_cmd_tso_mss);
2253
2254         if (tsyn)
2255                 tx_flags |= I40E_TX_FLAGS_TSYN;
2256
2257         /* always enable CRC insertion offload */
2258         td_cmd |= I40E_TX_DESC_CMD_ICRC;
2259
2260         /* Always offload the checksum, since it's in the data descriptor */
2261         if (skb->ip_summed == CHECKSUM_PARTIAL) {
2262                 tx_flags |= I40E_TX_FLAGS_CSUM;
2263
2264                 i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
2265                                     tx_ring, &cd_tunneling);
2266         }
2267
2268         i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
2269                            cd_tunneling, cd_l2tag2);
2270
2271         /* Add Flow Director ATR if it's enabled.
2272          *
2273          * NOTE: this must always be directly before the data descriptor.
2274          */
2275         i40e_atr(tx_ring, skb, tx_flags, protocol);
2276
2277         i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
2278                     td_cmd, td_offset);
2279
2280         i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
2281
2282         return NETDEV_TX_OK;
2283
2284 out_drop:
2285         dev_kfree_skb_any(skb);
2286         return NETDEV_TX_OK;
2287 }
2288
2289 /**
2290  * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
2291  * @skb:    send buffer
2292  * @netdev: network interface device structure
2293  *
2294  * Returns NETDEV_TX_OK if sent, else an error code
2295  **/
2296 netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
2297 {
2298         struct i40e_netdev_priv *np = netdev_priv(netdev);
2299         struct i40e_vsi *vsi = np->vsi;
2300         struct i40e_ring *tx_ring = vsi->tx_rings[skb->queue_mapping];
2301
2302         /* hardware can't handle really short frames, hardware padding works
2303          * beyond this point
2304          */
2305         if (unlikely(skb->len < I40E_MIN_TX_LEN)) {
2306                 if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len))
2307                         return NETDEV_TX_OK;
2308                 skb->len = I40E_MIN_TX_LEN;
2309                 skb_set_tail_pointer(skb, I40E_MIN_TX_LEN);
2310         }
2311
2312         return i40e_xmit_frame_ring(skb, tx_ring);
2313 }