bnxt_en: add vf-rep RX/TX and netdev implementation
authorSathya Perla <sathya.perla@broadcom.com>
Mon, 24 Jul 2017 16:34:28 +0000 (12:34 -0400)
committerDavid S. Miller <davem@davemloft.net>
Tue, 25 Jul 2017 00:29:58 +0000 (17:29 -0700)
This patch introduces the RX/TX and a simple netdev implementation
for VF-reps. The VF-reps use the RX/TX rings of the PF. For each VF-rep
the PF driver issues a VFR_ALLOC FW cmd that returns "cfa_code"
and "cfa_action" values. The FW sets up the filter tables in such
a way that VF traffic by default (in absence of other rules)
gets punted to the parent PF. The cfa_code value in the RX-compl
informs the driver of the source VF. For traffic being transmitted
from the VF-rep, the TX BD is tagged with a cfa_action value that
informs the HW to punt it to the corresponding VF.

Signed-off-by: Sathya Perla <sathya.perla@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/broadcom/bnxt/bnxt.h
drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.c
drivers/net/ethernet/broadcom/bnxt/bnxt_vfr.h

index ebdeeb4a5756302352f03f663d3879b8c8c3b7e2..f262fe6092d7795a430d5649b2e8561e1b921716 100644 (file)
@@ -245,6 +245,16 @@ const u16 bnxt_lhint_arr[] = {
        TX_BD_FLAGS_LHINT_2048_AND_LARGER,
 };
 
+static u16 bnxt_xmit_get_cfa_action(struct sk_buff *skb)
+{
+       struct metadata_dst *md_dst = skb_metadata_dst(skb);
+
+       if (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)
+               return 0;
+
+       return md_dst->u.port_info.port_id;
+}
+
 static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct bnxt *bp = netdev_priv(dev);
@@ -289,7 +299,7 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
        tx_buf->nr_frags = last_frag;
 
        vlan_tag_flags = 0;
-       cfa_action = 0;
+       cfa_action = bnxt_xmit_get_cfa_action(skb);
        if (skb_vlan_tag_present(skb)) {
                vlan_tag_flags = TX_BD_CFA_META_KEY_VLAN |
                                 skb_vlan_tag_get(skb);
@@ -324,7 +334,8 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
                        tx_push1->tx_bd_hsize_lflags = 0;
 
                tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
-               tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+               tx_push1->tx_bd_cfa_action =
+                       cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
 
                end = pdata + length;
                end = PTR_ALIGN(end, 8) - 1;
@@ -429,7 +440,8 @@ normal_tx:
        txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
 
        txbd1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
-       txbd1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+       txbd1->tx_bd_cfa_action =
+                       cpu_to_le32(cfa_action << TX_BD_CFA_ACTION_SHIFT);
        for (i = 0; i < last_frag; i++) {
                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
@@ -1034,7 +1046,10 @@ static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
                bnxt_sched_reset(bp, rxr);
                return;
        }
-
+       /* Store cfa_code in tpa_info to use in tpa_end
+        * completion processing.
+        */
+       tpa_info->cfa_code = TPA_START_CFA_CODE(tpa_start1);
        prod_rx_buf->data = tpa_info->data;
        prod_rx_buf->data_ptr = tpa_info->data_ptr;
 
@@ -1269,6 +1284,17 @@ static inline struct sk_buff *bnxt_gro_skb(struct bnxt *bp,
        return skb;
 }
 
+/* Given the cfa_code of a received packet determine which
+ * netdev (vf-rep or PF) the packet is destined to.
+ */
+static struct net_device *bnxt_get_pkt_dev(struct bnxt *bp, u16 cfa_code)
+{
+       struct net_device *dev = bnxt_get_vf_rep(bp, cfa_code);
+
+       /* if vf-rep dev is NULL, the must belongs to the PF */
+       return dev ? dev : bp->dev;
+}
+
 static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                                           struct bnxt_napi *bnapi,
                                           u32 *raw_cons,
@@ -1362,7 +1388,9 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
                        return NULL;
                }
        }
-       skb->protocol = eth_type_trans(skb, bp->dev);
+
+       skb->protocol =
+               eth_type_trans(skb, bnxt_get_pkt_dev(bp, tpa_info->cfa_code));
 
        if (tpa_info->hash_type != PKT_HASH_TYPE_NONE)
                skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type);
@@ -1389,6 +1417,18 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
        return skb;
 }
 
+static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi,
+                            struct sk_buff *skb)
+{
+       if (skb->dev != bp->dev) {
+               /* this packet belongs to a vf-rep */
+               bnxt_vf_rep_rx(bp, skb);
+               return;
+       }
+       skb_record_rx_queue(skb, bnapi->index);
+       napi_gro_receive(&bnapi->napi, skb);
+}
+
 /* returns the following:
  * 1       - 1 packet successfully received
  * 0       - successful TPA_START, packet not completed yet
@@ -1405,7 +1445,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
        struct rx_cmp *rxcmp;
        struct rx_cmp_ext *rxcmp1;
        u32 tmp_raw_cons = *raw_cons;
-       u16 cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
+       u16 cfa_code, cons, prod, cp_cons = RING_CMP(tmp_raw_cons);
        struct bnxt_sw_rx_bd *rx_buf;
        unsigned int len;
        u8 *data_ptr, agg_bufs, cmp_type;
@@ -1447,8 +1487,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 
                rc = -ENOMEM;
                if (likely(skb)) {
-                       skb_record_rx_queue(skb, bnapi->index);
-                       napi_gro_receive(&bnapi->napi, skb);
+                       bnxt_deliver_skb(bp, bnapi, skb);
                        rc = 1;
                }
                *event |= BNXT_RX_EVENT;
@@ -1537,7 +1576,8 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
                skb_set_hash(skb, le32_to_cpu(rxcmp->rx_cmp_rss_hash), type);
        }
 
-       skb->protocol = eth_type_trans(skb, dev);
+       cfa_code = RX_CMP_CFA_CODE(rxcmp1);
+       skb->protocol = eth_type_trans(skb, bnxt_get_pkt_dev(bp, cfa_code));
 
        if ((rxcmp1->rx_cmp_flags2 &
             cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
@@ -1562,8 +1602,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
                }
        }
 
-       skb_record_rx_queue(skb, bnapi->index);
-       napi_gro_receive(&bnapi->napi, skb);
+       bnxt_deliver_skb(bp, bnapi, skb);
        rc = 1;
 
 next_rx:
@@ -6246,6 +6285,9 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
        /* Poll link status and check for SFP+ module status */
        bnxt_get_port_module_status(bp);
 
+       /* VF-reps may need to be re-opened after the PF is re-opened */
+       if (BNXT_PF(bp))
+               bnxt_vf_reps_open(bp);
        return 0;
 
 open_err:
@@ -6334,6 +6376,10 @@ int bnxt_close_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
                if (rc)
                        netdev_warn(bp->dev, "timeout waiting for SRIOV config operation to complete!\n");
        }
+
+       /* Close the VF-reps before closing PF */
+       if (BNXT_PF(bp))
+               bnxt_vf_reps_close(bp);
 #endif
        /* Change device state to avoid TX queue wake up's */
        bnxt_tx_disable(bp);
index a7d5f42fb6a3241350a320f5dbb2e8fcb1b9dc93..63756f0389d71e1d5ebe20cdef66661ef1aa40ed 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/interrupt.h>
 #include <net/devlink.h>
+#include <net/dst_metadata.h>
 
 struct tx_bd {
        __le32 tx_bd_len_flags_type;
@@ -243,6 +244,10 @@ struct rx_cmp_ext {
            ((le32_to_cpu((rxcmp1)->rx_cmp_flags2) &                    \
             RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3)
 
+#define RX_CMP_CFA_CODE(rxcmpl1)                                       \
+       ((le32_to_cpu((rxcmpl1)->rx_cmp_cfa_code_errors_v2) &           \
+         RX_CMPL_CFA_CODE_MASK) >> RX_CMPL_CFA_CODE_SFT)
+
 struct rx_agg_cmp {
        __le32 rx_agg_cmp_len_flags_type;
        #define RX_AGG_CMP_TYPE                                 (0x3f << 0)
@@ -312,6 +317,10 @@ struct rx_tpa_start_cmp_ext {
        __le32 rx_tpa_start_cmp_hdr_info;
 };
 
+#define TPA_START_CFA_CODE(rx_tpa_start)                               \
+       ((le32_to_cpu((rx_tpa_start)->rx_tpa_start_cmp_cfa_code_v2) &   \
+        RX_TPA_START_CMP_CFA_CODE) >> RX_TPA_START_CMPL_CFA_CODE_SHIFT)
+
 struct rx_tpa_end_cmp {
        __le32 rx_tpa_end_cmp_len_flags_type;
        #define RX_TPA_END_CMP_TYPE                             (0x3f << 0)
@@ -940,6 +949,7 @@ struct bnxt_vf_rep_stats {
 struct bnxt_vf_rep {
        struct bnxt                     *bp;
        struct net_device               *dev;
+       struct metadata_dst             *dst;
        u16                             vf_idx;
        u16                             tx_cfa_action;
        u16                             rx_cfa_code;
index eab358c2ac97973ade39cbea663dacf03f0f0308..60bdb181358e17baa53f8c5f29f8c739b205f644 100644 (file)
 #include "bnxt_vfr.h"
 
 #define CFA_HANDLE_INVALID             0xffff
+#define VF_IDX_INVALID                 0xffff
+
+static int hwrm_cfa_vfr_alloc(struct bnxt *bp, u16 vf_idx,
+                             u16 *tx_cfa_action, u16 *rx_cfa_code)
+{
+       struct hwrm_cfa_vfr_alloc_output *resp = bp->hwrm_cmd_resp_addr;
+       struct hwrm_cfa_vfr_alloc_input req = { 0 };
+       int rc;
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_ALLOC, -1, -1);
+       req.vf_id = cpu_to_le16(vf_idx);
+       sprintf(req.vfr_name, "vfr%d", vf_idx);
+
+       mutex_lock(&bp->hwrm_cmd_lock);
+       rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (!rc) {
+               *tx_cfa_action = le16_to_cpu(resp->tx_cfa_action);
+               *rx_cfa_code = le16_to_cpu(resp->rx_cfa_code);
+               netdev_dbg(bp->dev, "tx_cfa_action=0x%x, rx_cfa_code=0x%x",
+                          *tx_cfa_action, *rx_cfa_code);
+       } else {
+               netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+       }
+
+       mutex_unlock(&bp->hwrm_cmd_lock);
+       return rc;
+}
+
+static int hwrm_cfa_vfr_free(struct bnxt *bp, u16 vf_idx)
+{
+       struct hwrm_cfa_vfr_free_input req = { 0 };
+       int rc;
+
+       bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_VFR_FREE, -1, -1);
+       sprintf(req.vfr_name, "vfr%d", vf_idx);
+
+       rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+       if (rc)
+               netdev_info(bp->dev, "%s error rc=%d", __func__, rc);
+       return rc;
+}
+
+static int bnxt_vf_rep_open(struct net_device *dev)
+{
+       struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+       struct bnxt *bp = vf_rep->bp;
+
+       /* Enable link and TX only if the parent PF is open. */
+       if (netif_running(bp->dev)) {
+               netif_carrier_on(dev);
+               netif_tx_start_all_queues(dev);
+       }
+       return 0;
+}
+
+static int bnxt_vf_rep_close(struct net_device *dev)
+{
+       netif_carrier_off(dev);
+       netif_tx_disable(dev);
+
+       return 0;
+}
+
+static netdev_tx_t bnxt_vf_rep_xmit(struct sk_buff *skb,
+                                   struct net_device *dev)
+{
+       struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+       int rc, len = skb->len;
+
+       skb_dst_drop(skb);
+       dst_hold((struct dst_entry *)vf_rep->dst);
+       skb_dst_set(skb, (struct dst_entry *)vf_rep->dst);
+       skb->dev = vf_rep->dst->u.port_info.lower_dev;
+
+       rc = dev_queue_xmit(skb);
+       if (!rc) {
+               vf_rep->tx_stats.packets++;
+               vf_rep->tx_stats.bytes += len;
+       }
+       return rc;
+}
+
+static void
+bnxt_vf_rep_get_stats64(struct net_device *dev,
+                       struct rtnl_link_stats64 *stats)
+{
+       struct bnxt_vf_rep *vf_rep = netdev_priv(dev);
+
+       stats->rx_packets = vf_rep->rx_stats.packets;
+       stats->rx_bytes = vf_rep->rx_stats.bytes;
+       stats->tx_packets = vf_rep->tx_stats.packets;
+       stats->tx_bytes = vf_rep->tx_stats.bytes;
+}
+
+struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code)
+{
+       u16 vf_idx;
+
+       if (cfa_code && bp->cfa_code_map && BNXT_PF(bp)) {
+               vf_idx = bp->cfa_code_map[cfa_code];
+               if (vf_idx != VF_IDX_INVALID)
+                       return bp->vf_reps[vf_idx]->dev;
+       }
+       return NULL;
+}
+
+void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb)
+{
+       struct bnxt_vf_rep *vf_rep = netdev_priv(skb->dev);
+       struct bnxt_vf_rep_stats *rx_stats;
+
+       rx_stats = &vf_rep->rx_stats;
+       vf_rep->rx_stats.bytes += skb->len;
+       vf_rep->rx_stats.packets++;
+
+       netif_receive_skb(skb);
+}
+
+static void bnxt_vf_rep_get_drvinfo(struct net_device *dev,
+                                   struct ethtool_drvinfo *info)
+{
+       strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver));
+       strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
+}
+
+static const struct ethtool_ops bnxt_vf_rep_ethtool_ops = {
+       .get_drvinfo            = bnxt_vf_rep_get_drvinfo
+};
+
+static const struct net_device_ops bnxt_vf_rep_netdev_ops = {
+       .ndo_open               = bnxt_vf_rep_open,
+       .ndo_stop               = bnxt_vf_rep_close,
+       .ndo_start_xmit         = bnxt_vf_rep_xmit,
+       .ndo_get_stats64        = bnxt_vf_rep_get_stats64
+};
+
+/* Called when the parent PF interface is closed:
+ * As the mode transition from SWITCHDEV to LEGACY
+ * happens under the rtnl_lock() this routine is safe
+ * under the rtnl_lock()
+ */
+void bnxt_vf_reps_close(struct bnxt *bp)
+{
+       struct bnxt_vf_rep *vf_rep;
+       u16 num_vfs, i;
+
+       if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+               return;
+
+       num_vfs = pci_num_vf(bp->pdev);
+       for (i = 0; i < num_vfs; i++) {
+               vf_rep = bp->vf_reps[i];
+               if (netif_running(vf_rep->dev))
+                       bnxt_vf_rep_close(vf_rep->dev);
+       }
+}
+
+/* Called when the parent PF interface is opened (re-opened):
+ * As the mode transition from SWITCHDEV to LEGACY
+ * happen under the rtnl_lock() this routine is safe
+ * under the rtnl_lock()
+ */
+void bnxt_vf_reps_open(struct bnxt *bp)
+{
+       int i;
+
+       if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV)
+               return;
+
+       for (i = 0; i < pci_num_vf(bp->pdev); i++)
+               bnxt_vf_rep_open(bp->vf_reps[i]->dev);
+}
 
 static void __bnxt_vf_reps_destroy(struct bnxt *bp)
 {
@@ -27,6 +199,11 @@ static void __bnxt_vf_reps_destroy(struct bnxt *bp)
        for (i = 0; i < num_vfs; i++) {
                vf_rep = bp->vf_reps[i];
                if (vf_rep) {
+                       dst_release((struct dst_entry *)vf_rep->dst);
+
+                       if (vf_rep->tx_cfa_action != CFA_HANDLE_INVALID)
+                               hwrm_cfa_vfr_free(bp, vf_rep->vf_idx);
+
                        if (vf_rep->dev) {
                                /* if register_netdev failed, then netdev_ops
                                 * would have been set to NULL
@@ -60,6 +237,9 @@ void bnxt_vf_reps_destroy(struct bnxt *bp)
                bnxt_close_nic(bp, false, false);
                closed = true;
        }
+       /* un-publish cfa_code_map so that RX path can't see it anymore */
+       kfree(bp->cfa_code_map);
+       bp->cfa_code_map = NULL;
        bp->eswitch_mode = DEVLINK_ESWITCH_MODE_LEGACY;
 
        if (closed)
@@ -92,6 +272,8 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 {
        struct net_device *pf_dev = bp->dev;
 
+       dev->netdev_ops = &bnxt_vf_rep_netdev_ops;
+       dev->ethtool_ops = &bnxt_vf_rep_ethtool_ops;
        /* Just inherit all the featues of the parent PF as the VF-R
         * uses the RX/TX rings of the parent PF
         */
@@ -107,7 +289,7 @@ static void bnxt_vf_rep_netdev_init(struct bnxt *bp, struct bnxt_vf_rep *vf_rep,
 
 static int bnxt_vf_reps_create(struct bnxt *bp)
 {
-       u16 num_vfs = pci_num_vf(bp->pdev);
+       u16 *cfa_code_map = NULL, num_vfs = pci_num_vf(bp->pdev);
        struct bnxt_vf_rep *vf_rep;
        struct net_device *dev;
        int rc, i;
@@ -116,6 +298,16 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
        if (!bp->vf_reps)
                return -ENOMEM;
 
+       /* storage for cfa_code to vf-idx mapping */
+       cfa_code_map = kmalloc(sizeof(*bp->cfa_code_map) * MAX_CFA_CODE,
+                              GFP_KERNEL);
+       if (!cfa_code_map) {
+               rc = -ENOMEM;
+               goto err;
+       }
+       for (i = 0; i < MAX_CFA_CODE; i++)
+               cfa_code_map[i] = VF_IDX_INVALID;
+
        for (i = 0; i < num_vfs; i++) {
                dev = alloc_etherdev(sizeof(*vf_rep));
                if (!dev) {
@@ -130,6 +322,26 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
                vf_rep->vf_idx = i;
                vf_rep->tx_cfa_action = CFA_HANDLE_INVALID;
 
+               /* get cfa handles from FW */
+               rc = hwrm_cfa_vfr_alloc(bp, vf_rep->vf_idx,
+                                       &vf_rep->tx_cfa_action,
+                                       &vf_rep->rx_cfa_code);
+               if (rc) {
+                       rc = -ENOLINK;
+                       goto err;
+               }
+               cfa_code_map[vf_rep->rx_cfa_code] = vf_rep->vf_idx;
+
+               vf_rep->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX,
+                                                GFP_KERNEL);
+               if (!vf_rep->dst) {
+                       rc = -ENOMEM;
+                       goto err;
+               }
+               /* only cfa_action is needed to mux a packet while TXing */
+               vf_rep->dst->u.port_info.port_id = vf_rep->tx_cfa_action;
+               vf_rep->dst->u.port_info.lower_dev = bp->dev;
+
                bnxt_vf_rep_netdev_init(bp, vf_rep, dev);
                rc = register_netdev(dev);
                if (rc) {
@@ -139,11 +351,15 @@ static int bnxt_vf_reps_create(struct bnxt *bp)
                }
        }
 
+       /* publish cfa_code_map only after all VF-reps have been initialized */
+       bp->cfa_code_map = cfa_code_map;
        bp->eswitch_mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+       netif_keep_dst(bp->dev);
        return 0;
 
 err:
        netdev_info(bp->dev, "%s error=%d", __func__, rc);
+       kfree(cfa_code_map);
        __bnxt_vf_reps_destroy(bp);
        return rc;
 }
index 310c9c567152f2eeaa1249a937f87cc971f28116..c6cd55afbb89ce0c577ad765640c242d03b59d55 100644 (file)
@@ -34,5 +34,9 @@ static inline void bnxt_link_bp_to_dl(struct devlink *dl, struct bnxt *bp)
 int bnxt_dl_register(struct bnxt *bp);
 void bnxt_dl_unregister(struct bnxt *bp);
 void bnxt_vf_reps_destroy(struct bnxt *bp);
+void bnxt_vf_reps_close(struct bnxt *bp);
+void bnxt_vf_reps_open(struct bnxt *bp);
+void bnxt_vf_rep_rx(struct bnxt *bp, struct sk_buff *skb);
+struct net_device *bnxt_get_vf_rep(struct bnxt *bp, u16 cfa_code);
 
 #endif /* BNXT_VFR_H */